\input{../slidesComun}

\title[7. Orthogonality and least squares]{Chapter 7. Orthogonality and least squares}  
\COSS

% ==============================================
\begin{frame}\frametitle{References} 

\begin{figure}
	\includegraphics[scale=0.7]{../lay_linearalgebra.jpg}
\end{figure}
D. Lay. Linear algebra and its applications (3rd ed). Pearson (2006). Chapter 6.

\end{frame}

% ==============================================
\begin{frame}\frametitle{A little bit of history} 

Least squares was first used to solve problems in geodesy (\href{http://en.wikipedia.org/wiki/Adrien-Marie_Legendre}{Andrien-Marie Legendre}, 1805) and astronomy (\href{http://en.wikipedia.org/wiki/Carl_Friedrich_Gauss}{Carl Friedrich Gauss}, 1809). Gauss made the connection of this method to the distribution of measurement errors. Currently it is one
of the best understood and most widely spread methods.

\begin{figure}
	\includegraphics[height=3cm]{Legendre.jpg}
	\includegraphics[height=3cm]{../Tema2/Carl_Friedrich_Gauss.jpg}
\end{figure}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Applications} 
In this example Least Squares are used to plan a radiation therapy.
\begin{center}
	\includegraphics[height=5cm]{figILSApplication.jpg}
\end{center}
\begin{tiny}
Bedford, J. L. \textit{Sinogram analysis of aperture optimization by iterative least-squares in volumetric modulated arc therapy}. Physics in Medicine and Biology, \textbf{2013}, 58, 1235-1250
\end{tiny}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Applications} 
Traditionally, control applications were formulated in a least-squares setup. Currently, they have found more sophisticated goal functions  that can be regarded as evolved versions of least squares.
\begin{center}
	\includegraphics[height=5cm]{figArm.jpg}
\end{center}
\end{frame}

% ==============================================
\setnextsection{7}
\section{Orthogonality and least squares} 
\subsection{Inner product, length and orthogonality (a)} 
\Outline

\begin{frame}\frametitle{Inner product} 
\begin{ceudef}[Inner product or dot product]
	Let $\mathbf{u},\mathbf{v}\in\mathbb{R}^n$ be two vectors. The \textbf{inner product} or \textbf{dot product} between these two vectors is defined as
	\begin{center}
		$\mathbf{u}\cdot\mathbf{v}=\left<\mathbf{u},\mathbf{v}\right>\triangleq \sum\limits_{i=1}^n{u_iv_i}$
	\end{center}
\end{ceudef}

\begin{ceuthm}
	If we considered $\mathbf{u}$ and $\mathbf{v}$ to be column vectors ($\in\mathcal{M}_{n\times 1}$), then 
	\begin{center}
		$\mathbf{u}\cdot\mathbf{v}=\mathbf{u}^T\mathbf{v}$
	\end{center}
\end{ceuthm}

\begin{exampleblock}{Example}
	Let $\mathbf{u}=(2,-5,-1)$ and $\mathbf{v}=(3,2,-3)$.
	\begin{center}
		$\mathbf{u}\cdot\mathbf{v}=2\cdot 3 +(-5)\cdot 2+1\cdot (-3)=-1$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product} 
\begin{ceuthm}
	For any three vectors $\mathbf{u},\mathbf{v},\mathbf{w}\in\mathbb{R}^n$ and any scalar $r\in\mathbb{R}$ it is verified that
	\begin{enumerate}
		\item $\mathbf{u}\cdot\mathbf{v}=\mathbf{v}\cdot\mathbf{u}$
		\item $(\mathbf{u}+\mathbf{v})\cdot\mathbf{w}=\mathbf{u}\cdot\mathbf{w}+\mathbf{v}\cdot\mathbf{w}$
		\item $(r\mathbf{u})\cdot\mathbf{v}=r(\mathbf{u}\cdot\mathbf{v})=\mathbf{u}\cdot(r\mathbf{v})$
		\item $\mathbf{u}\cdot\mathbf{u}\geq 0$
		\item $\mathbf{u}\cdot\mathbf{u}=0 \Leftrightarrow \mathbf{u}=\mathbf{0}$
	\end{enumerate}
\end{ceuthm}

\begin{block}{Corollary}
	\begin{center}
		$(r_1\mathbf{u}_1+r_2\mathbf{u}_2+...+r_p\mathbf{u}_p)\cdot\mathbf{v}=r_1(\mathbf{u}_1\cdot\mathbf{v})+r_2(\mathbf{u}_2\cdot\mathbf{v})+...+r_p(\mathbf{u}_p\cdot\mathbf{v})$
	\end{center}
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Length} 
\begin{ceudef}[Length of a vector]
	Given any vector $\mathbf{v}$, its length is defined as
	\begin{center}
		$\|\mathbf{v}\|\triangleq\sqrt{\mathbf{v}\cdot\mathbf{v}}$
	\end{center}
\end{ceudef}
\begin{ceuthm}
	Given any vector $\mathbf{v}\in\mathbb{R}^n$
	\begin{center}
		$\|\mathbf{v}\|=\sqrt{v_1^2+v_2^2+...+v_n^2}$
	\end{center}
\end{ceuthm}

\begin{exampleblock}{Example}
	The length of $\mathbf{v}=(1,-2,2,0)$ is 
	\begin{center}
		$\|\mathbf{v}\|=\sqrt{1^2+(-2)^2+2^2+0^2}=3$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Length} 
\begin{ceuthm}
	For any vector $\mathbf{v}$ and any scalar $r$ it is verified that
	\begin{center}
		$\|r\mathbf{v}\|=|r|\|\mathbf{v}\|$
	\end{center}
	\underline{\textit{Proof}}\\
	It will be given only for $\mathbf{v}\in\mathbb{R}^n$:
	\begin{center}
		$\begin{array}{rcl}
			\|r\mathbf{v}\|&=&\sqrt{(rv_1)^2+(rv_2)^2+...+(rv_n)^2}=\sqrt{r^2(v_1^2+v_2^2+...+v_n^2)}\\
			  &=&\sqrt{r^2}\sqrt{v_1^2+v_2^2+...+v_n^2}=|r|\|\mathbf{v}\|
		\end{array}$(q.e.d.)
	\end{center}
\end{ceuthm}

\begin{exampleblock}{Example (continued)}
	Find a vector of unit length that has the same direction as $\mathbf{v}=(1,-2,2,0)$.\\
	\underline{\textit{Solution}}\\
	\begin{center}
		$\mathbf{u}_\mathbf{v}=\frac{\mathbf{v}}{\|\mathbf{v}\|}=\left(\frac{1}{3},-\frac{2}{3},\frac{2}{3},0\right) \Rightarrow
			\|\mathbf{u}_\mathbf{v}\|=\sqrt{\frac{1}{9}+\frac{4}{9}+\frac{4}{9}+0}=1$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Distance} 
\begin{ceudef}[Distance in $\mathbb{R}$]
	The distance between any two numbers $a,b\in\mathbb{R}$ can be defined as
	\begin{center}
		$d(a,b)=|a-b|$
	\end{center}
\end{ceudef}

\begin{exampleblock}{Example}
  Calculate the distance between 2 and 8 as well as between -3 and 4.
	\begin{center}
		\includegraphics[scale=0.45]{figDistance.jpg}
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Distance} 
\begin{ceudef}[Distance in $\mathbb{R}^n$]
	The distance between any two vectors $\mathbf{u},\mathbf{v}\in\mathbb{R}^n$ can be defined as
	\begin{center}
		$d(\mathbf{u},\mathbf{v})=\|\mathbf{u}-\mathbf{v}\|$
	\end{center}
\end{ceudef}

\begin{exampleblock}{Example}
  Calculate the distance between $\mathbf{u}=(7,1)$ and $\mathbf{v}=(3,2)$
	\begin{center}
		$d(\mathbf{u},\mathbf{v})=\|(7,1)-(3,2)\|=\|(4,-1)\|=\sqrt{4^2+1^2}=\sqrt{17}$
		\includegraphics[scale=0.4]{figDistance2.jpg}
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Distance} 
\begin{exampleblock}{Example}
  For any two vectors in $\mathbb{R}^3$, $\mathbf{u}$ and $\mathbf{v}$, the distance can be calculated through
	\begin{center}
		$d(\mathbf{u},\mathbf{v})=\|\mathbf{u}-\mathbf{v}\|=\|(u_1-v_1,u_2-v_2,u_3-v_3)\|=\sqrt{(u_1-v_1)^2+(u_2-v_2)^2+(u_3-v_3)^2}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonality} 
\begin{exampleblock}{Example}
  Any two vectors in $\mathbb{R}^2$, $\mathbf{u}$ and $\mathbf{v}$, are orthogonal if $d(\mathbf{u},\mathbf{v})=d(\mathbf{u},\mathbf{-v})$
	\begin{center}
		\includegraphics[scale=0.4]{figOrthogonality2D.jpg}\\
		$d^2(\mathbf{u},\mathbf{v})=\|\mathbf{u}-\mathbf{v}\|^2=(\mathbf{u}-\mathbf{v})\cdot(\mathbf{u}-\mathbf{v})=
		    \mathbf{u}\cdot\mathbf{u}+\mathbf{v}\cdot\mathbf{v}-2\mathbf{u}\cdot\mathbf{v}=\|\mathbf{u}\|^2+\|\mathbf{v}\|^2-2\mathbf{u}\cdot\mathbf{v}$\\
		$d^2(\mathbf{u},-\mathbf{v})=\|\mathbf{u}+\mathbf{v}\|^2=(\mathbf{u}+\mathbf{v})\cdot(\mathbf{u}+\mathbf{v})=
		    \mathbf{u}\cdot\mathbf{u}+\mathbf{v}\cdot\mathbf{v}+2\mathbf{u}\cdot\mathbf{v}=\|\mathbf{u}\|^2+\|\mathbf{v}\|^2+2\mathbf{u}\cdot\mathbf{v}$\\
		\vspace{0.5cm}
		$d^2(\mathbf{u},\mathbf{v})=d^2(\mathbf{u},-\mathbf{v}) \Rightarrow -2\mathbf{u}\cdot\mathbf{v}=2\mathbf{u}\cdot\mathbf{v} \Rightarrow \mathbf{u}\cdot\mathbf{v}=0$\\
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonality} 
\begin{ceudef}[Orthogonality between two vectors]
	Any two different vectors, $\mathbf{u}$ and $\mathbf{v}$, in a vector space $V$ are \textbf{orthogonal} iff
	\begin{center}
		$\mathbf{u}\cdot\mathbf{v}=0$
	\end{center}
\end{ceudef}

\begin{block}{Corollary}
	$\mathbf{0}$ is orthogonal to any other vector.
\end{block}

\begin{ceuthm}[Pythagorean theorem]
	Any two vectors, $\mathbf{u}$ and $\mathbf{v}$, in a vector space $V$ are orthogonal iff
	\begin{center}
		$\|\mathbf{u}+\mathbf{v}\|^2=\|\mathbf{u}\|^2+\|\mathbf{v}\|^2$
	\end{center}
	\label{thm:pythagorean}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonality} 
\begin{ceudef}[Orthogonality between vector and vector space]
	Let $\mathbf{u}$ be a vector in a vector space $V$ and $W$ a vector subspace of $V$. $\mathbf{u}$ is \textbf{orthogonal} to $W$ if $\mathbf{u}$ is orthogonal
	to all vectors in $W$. The set of all vectors orthogonal to $W$ is denoted as $W^{\perp}$ (the \textbf{orthogonal complement} of $W$).
\end{ceudef}

\begin{exampleblock}{Example}
	Let $W$ be a plane in $\mathbb{R}^3$ passing through the origin and $L$ be a line, passing through the origin and perpendicular to $W$. For any vector $\mathbf{w}\in W$ and any vector 
	$\mathbf{z}\in L$ we have
	\begin{columns}
		\begin{column}{5cm}
			\begin{center}
				$\mathbf{w}\cdot\mathbf{z}=0$
			\end{center}
			Therefore,
			\begin{center}
				$L=W^\perp \Leftrightarrow W=L^\perp$
			\end{center}
		\end{column}
		\begin{column}{5cm}
			\includegraphics[scale=0.5]{figPerpendicular.jpg}
		\end{column}
	\end{columns}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonality} 
\begin{ceuthm}
	Let $W$ be a vector subspace of a vector space $V$.
	\begin{enumerate}
		\item $\mathbf{x}\in W^\perp$ iff $\mathbf{x}$ is orthogonal to every vector in a set that spans $W$.
		\item $W^\perp$ is a vector subspace of $V$.
	\end{enumerate}
\end{ceuthm}

\begin{ceuthm}
	\begin{columns}
		\begin{column}{5cm}
			Let $A\in\mathcal{M}_{m\times n}$, then
			\begin{enumerate}
				\item $(\mathrm{Row}\{A\})^\perp=\mathrm{Nul}\{A\}$
				\item $(\mathrm{Col}\{A\})^\perp=\mathrm{Nul}\{A^T\}$
			\end{enumerate}
		\end{column}
		\begin{column}{6cm}
			\begin{center}
				\includegraphics[scale=0.4]{figOrthogonality2.jpg}
			\end{center}
		\end{column}
	\end{columns}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonality} 
\begin{block}{}
	\underline{\textit{Proof $\mathrm{Nul}\{A\} \subseteq (\mathrm{Row}\{A\})^\perp$}}\\
	Consider the rows of $A$, $\mathbf{a}_i$ ($i=1,2,...,m$) as column vectors, then for any vector $\mathbf{x}\in\mathrm{Nul}\{A\}$ we know
	\begin{center}
		$A\mathbf{x}=\mathbf{0}\Rightarrow \begin{pmatrix} \mathbf{a}_1^T \\ \mathbf{a}_2^T \\ ... \\ \mathbf{a}_m^T\end{pmatrix}\mathbf{x}=
		   \begin{pmatrix} \mathbf{a}_1^T\mathbf{x} \\ \mathbf{a}_2^T\mathbf{x} \\ ... \\ \mathbf{a}_m^T\mathbf{x}\end{pmatrix}=
			 \begin{pmatrix} \mathbf{a}_1\cdot\mathbf{x} \\ \mathbf{a}_2\cdot\mathbf{x} \\ ... \\ \mathbf{a}_m\cdot\mathbf{x}\end{pmatrix}=
			 \begin{pmatrix} 0 \\ 0 \\ ... \\ 0\end{pmatrix}$
	\end{center}
	Consequently, $\mathbf{x}$ is orthogonal to all the rows of $A$, which span $\mathrm{Row}\{A\}$ and by the previous theorem, $\mathbf{x}\in(\mathrm{Row}\{A\})^\perp$
	
	\underline{\textit{Proof $\mathrm{Nul}\{A\} \supseteq (\mathrm{Row}\{A\})^\perp$}}\\
	Conversely, let $\mathbf{x}\in(\mathrm{Row}\{A\})^\perp$, then by the previous theorem we know that
	\begin{center}
		$\mathbf{a}_i\cdot\mathbf{x} \quad \text{for}\; i=1,2,...,m \Rightarrow A\mathbf{x}=\mathbf{0}$
	\end{center}
	So, $\mathbf{x}\in\mathrm{Nul}\{A\}$

\end{block}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonality} 
\begin{block}{}
	\underline{\textit{Proof $(\mathrm{Col}\{A\})^\perp=\mathrm{Nul}\{A^T\}$}}\\
	Let's define $B=A^T$. By the first part of this theorem, we know
	\begin{center}
		$(\mathrm{Row}\{B\})^\perp=\mathrm{Nul}\{B\} \Rightarrow 
		(\mathrm{Row}\{A^T\})^\perp=\mathrm{Nul}\{A^T\} \Rightarrow 
		(\mathrm{Col}\{A\})^\perp=\mathrm{Nul}\{A^T\} $
	\end{center}
\end{block}

\begin{ceuthm}
	For any two vectors $\mathbf{u}$ and $\mathbf{v}$ in a vector space $V$, the angle between the two can be measured through
	the dot product:
	\begin{center}
		$\mathbf{u}\cdot\mathbf{v}=\|\mathbf{u}\|\|\mathbf{v}\|\cos\theta$
	\end{center}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 1:
	\begin{itemize}
		\item 6.1.15
		\item 6.1.22
		\item 6.1.24
		\item 6.1.26
		\item 6.1.28
		\item 6.1.30
		\item 6.1.32 (computer)
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Orthogonal sets, bases and matrices (a)} 
\Outline

\begin{frame}\frametitle{Orthogonal sets} 
\begin{ceudef}[Orthogonal set]
	Let $S=\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_p\}$ be a set of vectors. $S$ is an orthogonal set iff
	\begin{center}
		$\mathbf{u}_i\cdot\mathbf{u}_j=0 \quad \forall i,j\in\{1,2,...,p\}\; i\neq j$
	\end{center}
\end{ceudef}

\begin{exampleblock}{Example}
	Let $\mathbf{u}_1=(3,1,1)$, $\mathbf{u}_2=(-1,2,1)$, $\mathbf{u}_3=(-\frac{1}{2},-2,\frac{7}{2})$. Check whether the set
	$S=\{\mathbf{u}_1,\mathbf{u}_2,\mathbf{u}_3\}$ is orthogonal.\\
	\underline{\textit{Solution}}\\
	\begin{center}
		$\begin{array}{rcl}
			\mathbf{u}_1\cdot\mathbf{u}_2&=&3\cdot(-1)+1\cdot 2 + 1\cdot 1=0\\
			\mathbf{u}_1\cdot\mathbf{u}_3&=&3\cdot(-\frac{1}{2})+1\cdot (-2) + 1\cdot (\frac{7}{2})=0\\
			\mathbf{u}_2\cdot\mathbf{u}_3&=&(-1)\cdot(-\frac{1}{2})+2\cdot (-2) + 1\cdot (\frac{7}{2})=0\\
		\end{array}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal sets} 
\begin{ceuthm}
	If $S$ is an orthogonal set of non-null vectors, then $S$ is linearly independent and, consequently, it is a basis of the subspace spanned by $S$.\\
	\underline{\textit{Proof}}\\
	Let $\mathbf{u}_i$ ($i=1,2,...,p$) be the elements of $S$. Let us assume that $S$ is linearly dependent. Then, there exists coefficients $c_1$, $c_2$, ...,
	$c_p$ not all of them null such that
	\begin{center}
		$\mathbf{0}=c_1\mathbf{u}_1+c_2\mathbf{u}_2+...+c_p\mathbf{u}_p$
	\end{center}
	Now, we compute the inner product with $\mathbf{u}_1$
	\begin{center}
		$\mathbf{0}\cdot\mathbf{u}_1=(c_1\mathbf{u}_1+c_2\mathbf{u}_2+...+c_p\mathbf{u}_p)\cdot\mathbf{u}_1$\\
		$0=c_1(\mathbf{u}_1\cdot\mathbf{u}_1)+c_2(\mathbf{u}_2\cdot\mathbf{u}_1)+...+c_p(\mathbf{u}_p\cdot\mathbf{u}_1)=c_1\|\mathbf{u}_1\|^2\Rightarrow c_1=0$
	\end{center}
	Multiplying by $\mathbf{u}_i$ ($i=2,3,...,p$) we can show that all $c_i$'s are 0, and, therefore, the set $S$ is linearly independent.
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal basis} 
\begin{ceudef}[Orthogonal basis]
	A set of vectors $B$ is an ortohogonal basis of a vector space $V$ if it is an ortohogonal set and it is a basis of $V$.
\end{ceudef}

\begin{ceuthm}
	Let $\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_p\}$ be an orthogonal basis for a vector space $V$, for each $\mathbf{x}\in V$ we have
	\begin{center}
		$\mathbf{x}=\frac{\mathbf{x}\cdot\mathbf{\mathbf{u}_1}}{\|\mathbf{u}_1\|^2}\mathbf{u}_1+
		   \frac{\mathbf{x}\cdot\mathbf{\mathbf{u}_2}}{\|\mathbf{u}_2\|^2}\mathbf{u}_2+...+
			 \frac{\mathbf{x}\cdot\mathbf{\mathbf{u}_p}}{\|\mathbf{u}_p\|^2}\mathbf{u}_p$
	\end{center}
	\underline{\textit{Proof}}\\
	If $\mathbf{x}$ is in $V$, then it can be expressed as a linear combination of the vectors in a basis of $V$
	\begin{center}
		$\mathbf{x}=c_1\mathbf{u}_1+c_2\mathbf{u}_2+...+c_p\mathbf{u}_p$
	\end{center}
	Now, we calculate the dot product with $\mathbf{u}_1$
	\begin{center}
		$\mathbf{x}\cdot\mathbf{u}_1=(c_1\mathbf{u}_1+c_2\mathbf{u}_2+...+c_p\mathbf{u}_p)\cdot\mathbf{u}_1=c_1\|\mathbf{u}_1\|^2 \Rightarrow c_1=\frac{\mathbf{x}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}$
	\end{center}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal basis} 
\begin{exampleblock}{Example}
	Let $\mathbf{u}_1=(3,1,1)$, $\mathbf{u}_2=(-1,2,1)$, $\mathbf{u}_3=(-\frac{1}{2},-2,\frac{7}{2})$, and $B=\{\mathbf{u}_1,\mathbf{u}_2,\mathbf{u}_3\}$ be
	an orthogonal basis of $\mathbb{R}^3$. Let $\mathbf{x}=(6,1,-8)$. The coordinates of $\mathbf{x}$ in $B$ are given by
	\begin{center}
		$\begin{array}{ccc}
			\mathbf{x}\cdot\mathbf{u}_1 = 11 & \mathbf{x}\cdot\mathbf{u}_2 = -12 & \mathbf{x}\cdot\mathbf{u}_1 = -33 \\
			\|\mathbf{u}_1\|^2 = 11 & \|\mathbf{u}_2\|^2  = 6 & \|\mathbf{u}_3\|^2  = \frac{33}{2} \\
		\end{array}$\\
		\vspace{0.5cm}
		$\begin{array}{rcl}\mathbf{x}&=&\frac{11}{11}\mathbf{u}_1+\frac{-12}{6}\mathbf{u}_2+\frac{-33}{\frac{33}{2}}\mathbf{u}_3\\&=&\mathbf{u}_1-2\mathbf{u}_2-2\mathbf{u}_3\end{array}$
	\end{center}
	The coordinates of $\mathbf{x}$ in the basis $B$ are
	\begin{center}
		$[\mathbf{x}]_B=(1,-2,-2)$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{block}{Orthogonal projection onto a vector}
	Consider a vector $\mathbf{y}$ and another one $\mathbf{u}$. Let us assume we want to decompose $\mathbf{y}$ as the sum of two orthogonal
	vectors $\hat{\mathbf{y}}$ (along the direction of $\mathbf{u}$) and another vector $\mathbf{z}$ (orthogonal to $\mathbf{u}$):
	\begin{columns}
		\begin{column}{5cm}
			$\mathbf{y}=\hat{\mathbf{y}}+\mathbf{z}=\alpha\mathbf{u}+\mathbf{z} \Rightarrow$\\
			$\mathbf{z}=\mathbf{y}-\hat{\mathbf{y}}$
		\end{column}
		\begin{column}{5cm}
			\includegraphics[scale=0.35]{figOrthogonalProjection.jpg}
		\end{column}
	\end{columns}
	We need to find $\alpha$ that makes $\mathbf{u}$ and $\mathbf{z}$ orthogonal.
	\begin{center}
	  $0=\mathbf{z}\cdot\mathbf{u}=(\mathbf{y}-\alpha\mathbf{u})\cdot\mathbf{u}=\mathbf{y}\cdot\mathbf{u}-\alpha\|\mathbf{u}\|^2 \Rightarrow \alpha=
		  \frac{\mathbf{y}\cdot\mathbf{u}}{\|\mathbf{u}\|^2}$
	\end{center}
	$\hat{\mathbf{y}}$ is the \textbf{orthogonal projection} of $\mathbf{y}$ onto $\mathbf{u}$.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{exampleblock}{Example}
	Let $\mathbf{y}=(7,6)$ and $\mathbf{u}=(4,2)$. Then, 
	\begin{center}
		$\left.\begin{array}{c}
		 \mathbf{y}\cdot\mathbf{u}=40 \\
		 \|\mathbf{u}\|^2=20
		\end{array}\right\}\Rightarrow 
		\begin{array}{c}
		  \hat{\mathbf{y}}=\frac{\mathbf{y}\cdot\mathbf{u}}{\|\mathbf{u}\|^2}\mathbf{u}=\frac{40}{20}\mathbf{u}=2\mathbf{u}=\begin{pmatrix}8\\4\end{pmatrix} \\
			\mathbf{z}=\mathbf{y}-\hat{\mathbf{y}}=\begin{pmatrix}7\\6\end{pmatrix}-\begin{pmatrix}8\\4\end{pmatrix}=\begin{pmatrix}-1\\2\end{pmatrix} \\
			d(\mathbf{y},\hat{\mathbf{y}})=\|\mathbf{y}-\hat{\mathbf{y}}\|=\|\mathbf{z}\|=\sqrt{(-1)^2+2^2}=\sqrt{5}
		\end{array}$
		\includegraphics[scale=0.45]{figOrthogonalProjection2.jpg}
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthonormal set} 
\begin{ceudef}[Orthonormal set]
	$\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_p\}$ is an \textbf{orthonormal set} if it is an orthogonal set and all $\mathbf{u}_i$ vectors have unit length.
\end{ceudef}
\begin{exampleblock}{Example}
	Show that the set $\{\mathbf{u}_1,\mathbf{u}_2,\mathbf{u}_3\}$ is orthonormal, with
	\begin{center}
		\begin{tabular}{ccc}
		   $\mathbf{u}_1=\frac{1}{\sqrt{11}}\begin{pmatrix}3\\1\\1\end{pmatrix}$ &
		   $\mathbf{u}_2=\frac{1}{\sqrt{6}}\begin{pmatrix}-1\\2\\1\end{pmatrix}$ &
		   $\mathbf{u}_3=\frac{1}{\sqrt{66}}\begin{pmatrix}-1\\-4\\7\end{pmatrix}$
		\end{tabular}
	\end{center}
	\underline{\textit{Solution}}\\
	Let's check that they are orthogonal:
	\begin{center}
		\begin{tabular}{l}
			$\mathbf{u}_1\cdot\mathbf{u}_2=\frac{1}{\sqrt{11}}\frac{1}{\sqrt{6}}(3\cdot(-1)+1\cdot 2+1\cdot 1)=0$ \\
			$\mathbf{u}_1\cdot\mathbf{u}_3=\frac{1}{\sqrt{11}}\frac{1}{\sqrt{66}}(3\cdot(-1)+1\cdot (-4)+1\cdot 7)=0$ \\
			$\mathbf{u}_2\cdot\mathbf{u}_3=\frac{1}{\sqrt{6}}\frac{1}{\sqrt{66}}((-1)\cdot(-1)+(2)\cdot (-4)+(1)\cdot 7)=0$ \\
		\end{tabular}
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthonormal set} 
\begin{exampleblock}{Example (continued)}
	Now, let's check that they have unit length:
	\begin{center}
		\begin{tabular}{l}
			$\|\mathbf{u}_1\|=\sqrt{\left(\frac{1}{\sqrt{11}}\right)^2(3^2+1^2+1^2)}=\sqrt{\frac{9+1+1}{11}}=1$ \\
			$\|\mathbf{u}_2\|=\sqrt{\left(\frac{1}{\sqrt{6}}\right)^2((-1)^2+2^2+1^2)}=\sqrt{\frac{1+4+1}{6}}=1$ \\
			$\|\mathbf{u}_3\|=\sqrt{\left(\frac{1}{\sqrt{66}}\right)^2((-1)^2+(-4)^2+7^2)}=\sqrt{\frac{1+16+49}{66}}=1$ \\
		\end{tabular}
	\end{center}
\end{exampleblock}

\begin{ceuthm}
	If $S=\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_n\}$ is an orthonormal set, then it is an orthonormal basis of $\mathrm{Span}\{S\}$.
\end{ceuthm}
\begin{exampleblock}{Example}
	$\{\mathbf{e}_1,\mathbf{e}_2,...,\mathbf{e}_n\}$ is an orthonormal basis of $\mathbb{R}^n$.
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthonormal basis} 
\begin{ceuthm}
	Let $S=\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_n\}$ is an orthogonal set of vectors, then the set $S'=\{\mathbf{u}_1',\mathbf{u}_2',...,\mathbf{u}_n'\}$ where
	\begin{center}
		$\mathbf{u}_i'=\frac{\mathbf{u}_i}{\|\mathbf{u}_i\|}$
	\end{center}
	is a orthonormal set (this operation is called \textbf{vector normalization}).\\
	\underline{\textit{Proof}}\\
  Let's check that the $\mathbf{u}_i'$ vectors are orthogonal:
	\begin{center}
		$\mathbf{u}_i'\cdot\mathbf{u}_j'=\frac{\mathbf{u}_i}{\|\mathbf{u}_i\|}\cdot\frac{\mathbf{u}_j}{\|\mathbf{u}_j\|}=
		   \frac{1}{\|\mathbf{u}_i\|\|\mathbf{u}_j\|}\mathbf{u}_i\cdot\mathbf{u}_j$
	\end{center}
	But this product is obviusly 0 because the $\mathbf{u}_i$ vectors are orthogonal.
  Let's check now that the $\mathbf{u}_i'$ vectors have unit length:
	\begin{center}
		$\|\mathbf{u}_i'\|=\left\|\frac{\mathbf{u}_i}{\|\mathbf{u}_i\|}\right\|=\frac{\|\mathbf{u}_i\|}{\|\mathbf{u}_i\|}=1$
	\end{center}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthonormal matrix} 
\begin{ceuthm}
	Let $U\in\mathcal{M}_{m\times n}$ be a square matrix. The columns of $U$ form an orthonormal set iff
	\begin{center}
		$U^TU=I_n$
	\end{center}
	It is said that $U$ is an \textbf{orthonormal matrix}.\\
	\underline{\textit{Proof}}\\
	Let's consider the columns of $U$
	\begin{center}
		$U=\begin{pmatrix}\mathbf{u}_1&\mathbf{u}_2&...&\mathbf{u}_n\end{pmatrix}$
	\end{center}
	Let's calculate now $U^TU$
	\begin{center}
		$U^TU=\begin{pmatrix}\mathbf{u}_1^T\\\mathbf{u}_2^T\\...\\ \mathbf{u}_n^T\end{pmatrix}\begin{pmatrix}\mathbf{u}_1&\mathbf{u}_2&...&\mathbf{u}_n\end{pmatrix}=
		   \begin{pmatrix}
			   \mathbf{u}_1^T\mathbf{u}_1 & \mathbf{u}_1^T\mathbf{u}_2 & ... & \mathbf{u}_1^T\mathbf{u}_n \\
			   \mathbf{u}_2^T\mathbf{u}_1 & \mathbf{u}_2^T\mathbf{u}_2 & ... & \mathbf{u}_2^T\mathbf{u}_n \\
			   ... & ... & ... & ... \\
			   \mathbf{u}_n^T\mathbf{u}_1 & \mathbf{u}_n^T\mathbf{u}_2 & ... & \mathbf{u}_n^T\mathbf{u}_n \\
			 \end{pmatrix}$
	\end{center}
	The condition $U^TU=I_n$ simply states
	$\left\{\begin{array}{cc}
	   \mathbf{u}_i^T\mathbf{u}_j=0 & i\neq j \\
	   \mathbf{u}_i^T\mathbf{u}_j=1 & i= j \\
	\end{array}\right.$,
	which is the definition of an orthonormal set.
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthonormal matrix} 
\begin{ceuthm}
	Let $U\in\mathcal{M}_{n\times n}$ be an orthonormal matrix and $\forall\mathbf{x},\mathbf{y}\in\mathbb{R}^n$, then
	\begin{enumerate}
		\item $\|U\mathbf{x}\|=\|\mathbf{x}\|$
		\item $(U\mathbf{x})\cdot(U\mathbf{y})=\mathbf{x}\cdot\mathbf{y}$
		\item $(U\mathbf{x})\cdot(U\mathbf{y})=0 \Leftrightarrow \mathbf{x}\cdot\mathbf{y}=0$
	\end{enumerate}
\end{ceuthm}
\begin{exampleblock}{Example}
	Let $U=\begin{pmatrix}\frac{1}{\sqrt{2}} & \frac{2}{3}\\\frac{1}{\sqrt{2}} & -\frac{2}{3}\\0&\frac{1}{3}\end{pmatrix}$ and $\mathbf{x}=\begin{pmatrix}\sqrt{2}\\ 3\end{pmatrix}$.\\
	$U$ is an orthonormal matrix because
	\begin{center}
		$U^TU=\begin{pmatrix}\frac{1}{\sqrt{2}} & \frac{1}{\sqrt{2}} & 0 \\ \frac{2}{3}\\ & -\frac{2}{3} & \frac{1}{3}\end{pmatrix}
		      \begin{pmatrix}\frac{1}{\sqrt{2}} & \frac{2}{3}\\\frac{1}{\sqrt{2}} & -\frac{2}{3}\\0&\frac{1}{3}\end{pmatrix}=
					\begin{pmatrix}1 & 0\\ 0 & 1 \end{pmatrix}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthonormal matrix} 
\begin{exampleblock}{Example (continued)}
	Let's calculate now  $U\mathbf{x}$
	\begin{center}
		$U\mathbf{x}=\begin{pmatrix}\frac{1}{\sqrt{2}} & \frac{2}{3}\\\frac{1}{\sqrt{2}} & -\frac{2}{3}\\0&\frac{1}{3}\end{pmatrix}
		   \begin{pmatrix}\sqrt{2}\\ 3\end{pmatrix}=\begin{pmatrix}3\\-1\\1\end{pmatrix}$
	\end{center}
	Let's check now that $\|U\mathbf{x}\|=\|\mathbf{x}\|$
	\begin{center}
		$\|U\mathbf{x}\|=\left\|(3,-1,1)\right\|=\sqrt{3^2+(-1)^2+1^2}=\sqrt{11}$\\
		$\|\mathbf{x}\|=\left\|(\sqrt{2},3)\right\|=\sqrt{(\sqrt{2})^2+3^2}=\sqrt{11}$\\
	\end{center}
\end{exampleblock}

\begin{ceuthm}
	Let $U$ be an orthonormal and square matrix. Then,
	\begin{enumerate}
		\item $U^{-1}=U^T$
		\item $U^T$ is also an orthonormal matrix (i.e., the rows of $U$ also form an orthonormal set of vectors).
	\end{enumerate}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 2:
	\begin{itemize}
		\item 6.2.1
		\item 6.2.10
		\item 6.2.15
		\item 6.2.25
		\item 6.2.26
		\item 6.2.29
		\item 6.2.35 (computer)
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Orthogonal projections (b)} 
\Outline

\begin{frame}\frametitle{Orthogonal projections} 
\begin{ceudef}[Orthogonal projection]
	The orthogonal projection of a point $\mathbf{y}$ onto a vector subspace $W$ is a point $\hat{\mathbf{y}} \in W$ such that
	\begin{center}
		$\mathbf{z}=\mathbf{y}-\hat{\mathbf{y}}$\\
		$\mathbf{z} \perp W$\\
		\includegraphics[scale=0.5]{figOrthogonalProjection3.jpg}
	\end{center}
\end{ceudef}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{exampleblock}{Example}
	Let $\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_5\}$ be an orthogonal basis of $\mathbb{R}^5$. Consider the subspace $W=\mathrm{Span}\{\mathbf{u}_1,\mathbf{u}_2\}$. Given
	any vector $\mathbf{y}\in\mathbb{R}^5$, we can decompose it as the sum of a vector in $W$ and a vector perpendicular to $W$
	\begin{center}
		$\mathbf{y}=\hat{\mathbf{y}}+\mathbf{z}$
	\end{center}
	\underline{\textit{Solution}}\\
	If $\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_5\}$ is a basis of $\mathbb{R}^5$, then any vector $\mathbf{y}\in\mathbb{R}^5$ can be written as
	\begin{center}
		$\mathbf{y}=c_1\mathbf{u}_1+c_2\mathbf{u}_2+...+c_5\mathbf{u}_5$
	\end{center}
	We may decompose this sum as
	\begin{center}
		$\hat{\mathbf{y}}=c_1\mathbf{u}_1+c_2\mathbf{u}_2$\\
		$\mathbf{z}=c_3\mathbf{u}_3+c_4\mathbf{u}_4+c_5\mathbf{u}_5$\\
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{exampleblock}{Example (continued)}
	It is obvious that $\hat{\mathbf{y}}\in W$. Now we need to show that $\mathbf{z}\in W^{\perp}$. For doing so, we will show that
	\begin{center}
		\begin{tabular}{l}
			$\mathbf{z}\cdot \mathbf{u}_1=0$\\
			$\mathbf{z}\cdot \mathbf{u}_2=0$
		\end{tabular}
	\end{center}
	To show the first equation we note that
	\begin{center}
		$\begin{array}{rcl}\mathbf{z}\cdot \mathbf{u}_1&=&(c_3\mathbf{u}_3+c_4\mathbf{u}_4+c_5\mathbf{u}_5)\cdot \mathbf{u}_1\\
		   &=&c_3(\mathbf{u}_3\cdot \mathbf{u}_1)+c_4(\mathbf{u}_4\cdot \mathbf{u}_1)+c_5(\mathbf{u}_5\cdot \mathbf{u}_1)\\
			 &=&c_3\cdot 0+c_4\cdot 0+c_5\cdot 0\\
			 &=&0
		\end{array}$
	\end{center}
	We would proceed analogously for $\mathbf{z}\cdot \mathbf{u}_2=0$.
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{ceuthm}[Orthogonal Decomposition Theorem]
	Let $W$ be a vector subspace of a vector space $V$. Then, any vector $\mathbf{y}\in V$ can be written uniquely as
	\begin{center}
		$\mathbf{y}=\hat{\mathbf{y}}+\mathbf{z}$
	\end{center}
	with $\hat{\mathbf{y}}\in W$ and $\mathbf{z}\in W^{\perp}$. In fact, if $\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_p\}$
	is an orthogonal basis of $W$, then
	\begin{center}
		$\hat{\mathbf{y}}=\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}\mathbf{u}_1+
		                  \frac{\mathbf{y}\cdot\mathbf{u}_2}{\|\mathbf{u}_2\|^2}\mathbf{u}_2+
											...+
											\frac{\mathbf{y}\cdot\mathbf{u}_p}{\|\mathbf{u}_p\|^2}\mathbf{u}_p$\\
		\includegraphics[scale=0.5]{figOrthogonalProjection4.jpg}
	\end{center}
	\label{thm:orthogonalDecomposition}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{block}{}
	\underline{\textit{Proof}}\\
	$\hat{\mathbf{y}}$ is obviously in $W$ since it has been written as a linear combination of vectors in a basis of $W$. $\mathbf{z}$ is perpendicular to $W$ because
	\begin{center}
		$\begin{array}{rcl}\mathbf{z}\cdot\mathbf{u}_1&=&\left(\mathbf{y}-\left(\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}\mathbf{u}_1+
		                  \frac{\mathbf{y}\cdot\mathbf{u}_2}{\|\mathbf{u}_2\|^2}\mathbf{u}_2+
											...+
											\frac{\mathbf{y}\cdot\mathbf{u}_p}{\|\mathbf{u}_p\|^2}\mathbf{u}_p\right)\right)\cdot\mathbf{u}_1\\
							   &=&\mathbf{y}\cdot\mathbf{u}_1
								    -\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}(\mathbf{u}_1\cdot \mathbf{u}_1)
										-\frac{\mathbf{y}\cdot\mathbf{u}_2}{\|\mathbf{u}_2\|^2}(\mathbf{u}_2\cdot \mathbf{u}_1)
										-...
										-\frac{\mathbf{y}\cdot\mathbf{u}_p}{\|\mathbf{u}_p\|^2}(\mathbf{u}_p\cdot \mathbf{u}_1)\\
								 & &[\{\mathbf{u}_i\}\text{ is an orthogonal set}]\\
							   &=&\mathbf{y}\cdot\mathbf{u}_1
								    -\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}(\mathbf{u}_1\cdot \mathbf{u}_1) \\
							   &=&\mathbf{y}\cdot\mathbf{u}_1
								    -\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}\|\mathbf{u}_1\|^2 \\
							   &=&\mathbf{y}\cdot\mathbf{u}_1-\mathbf{y}\cdot\mathbf{u}_1\\
							   &=&0\\
		\end{array}$
	\end{center}
	We could proceed analogously for all elements in the basis of $W$.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{block}{}
	We need to show now that the decomposition is unique. Let us assume that it is not unique. Consequently, there exist different vectors such that
	\begin{center}
		$\mathbf{y}=\hat{\mathbf{y}}+\mathbf{z}$ \\
		$\mathbf{y}=\hat{\mathbf{y}}'+\mathbf{z}'$
	\end{center}
	We subtract both equations
	\begin{center}
		$\mathbf{0}=(\hat{\mathbf{y}}-\hat{\mathbf{y}}')+(\mathbf{z}-\mathbf{z}') \Rightarrow \hat{\mathbf{y}}-\hat{\mathbf{y}}'=\mathbf{z}'-\mathbf{z}$ 
	\end{center}
	Let $\mathbf{v}=\hat{\mathbf{y}}-\hat{\mathbf{y}}'$. It is obvious that $\mathbf{v}\in W$ because it is written as a linear combination of vectors in $W$. On the
	other side, $\mathbf{v}=\mathbf{z}'-\mathbf{z}$, i.e., it is a linear combination of vectors in $W^\perp$, so $\mathbf{v}\in W^\perp$. The only vector that
	belongs to $W$ and $W^\perp$ at the same time is
	\begin{center}
		$\mathbf{v}=\mathbf{0}\Rightarrow \left\{\begin{array}{c}\hat{\mathbf{y}}=\hat{\mathbf{y}}'\\ \mathbf{z}=\mathbf{z}'\end{array}\right.$.
	\end{center}
	and consequently, the orthogonal decomposition is unique. Additionally, the uniqueness of the decomposition depends only on $W$ and not on the particular
	basis chosen for $W$.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{exampleblock}{Example}
	Let $\mathbf{u}_1=(2,5,-1)$ and $\mathbf{u}_2=(-2,1,1)$. Let $W$ be the subspace spanned by $\mathbf{u}_1$ and $\mathbf{u}_2$. Let $\mathbf{y}=(1,2,3)\in\mathbb{R}^3$. The 
	orthogonal projection of $\mathbf{y}$ onto $W$ is
	\begin{center}
		$\begin{array}{rcl}\hat{\mathbf{y}}&=&\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}\mathbf{u}_1+
		                  \frac{\mathbf{y}\cdot\mathbf{u}_2}{\|\mathbf{u}_2\|^2}\mathbf{u}_2\\
										&=&\frac{1\cdot 2+2\cdot 5 + 3\cdot (-1)}{2^2+5^2+(-1)^2}\begin{pmatrix}2\\5\\-1\end{pmatrix}+
										   \frac{1\cdot(-2)+2\cdot 1 + 3\cdot 1}{(-2)^2+1^2+1^2}\begin{pmatrix}-2\\1\\1\end{pmatrix}\\
										&=&\frac{9}{30}\begin{pmatrix}2\\5\\-1\end{pmatrix}+
										   \frac{15}{30}\begin{pmatrix}-2\\1\\1\end{pmatrix}=
											\begin{pmatrix}-\frac{2}{5}\\2\\ \frac{1}{5}\end{pmatrix}\\
		\mathbf{z}&=&\mathbf{y}-\hat{\mathbf{y}}=\begin{pmatrix}1\\2\\3\end{pmatrix}-\begin{pmatrix}-\frac{2}{5}\\2\\ \frac{1}{5}\end{pmatrix}=
		   \begin{pmatrix}\frac{7}{5}\\0\\ \frac{14}{5}\end{pmatrix}
		\end{array}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Orthogonal projections} 
\begin{block}{Geometrical interpretation}
	$\hat{\mathbf{y}}$ can be understood as the sum of the orthogonal projection of $\mathbf{y}$ onto each one of the elements of the basis of $W$.
	\begin{center}
		\includegraphics[scale=0.35]{figOrthogonalProjection5.jpg}
	\end{center}
\end{block}

\begin{ceuthm}
	If $\mathbf{y}$ belongs to $W$, then the orthogonal projection of $\mathbf{y}$ onto $W$ is itself:
	\begin{center}
		$\hat{\mathbf{y}}=\mathbf{y}$
	\end{center}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Properties of orthogonal projections} 
\begin{ceuthm}[Best approximation theorem]
	The orthogonal projection of $\mathbf{y}$ onto $W$ is the point in $W$ with minimum distance to $\mathbf{y}$, i.e.,
	\begin{center}
		$\|\mathbf{y}-\hat{\mathbf{y}}\|\le\|\mathbf{y}-\mathbf{v}\|$
	\end{center}
	for all $\mathbf{v}\in W, \mathbf{v}\neq\hat{\mathbf{y}}$.\\
	\underline{\textit{Proof}}\\
	We know that $\mathbf{y}-\hat{\mathbf{y}}$ is orthogonal to $W$. For any vector $\mathbf{v}\in W, \mathbf{v}\neq\hat{\mathbf{y}}$,
	we have that $\hat{\mathbf{y}}-\mathbf{v}$ is in $W$. Now consider the orthogonal decomposition of the vector $\mathbf{y}-\mathbf{v}$
	\begin{columns}
		\begin{column}{4cm}
			$\mathbf{y}-\mathbf{v}=(\mathbf{y}-\hat{\mathbf{y}})+(\hat{\mathbf{y}}-\mathbf{v})$
		\end{column}
		\begin{column}{7cm}
			\includegraphics[scale=0.425]{figMinDistance.jpg}
		\end{column}
	\end{columns}
	\label{thm:bestApproximation}
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Properties of orthogonal projections} 
\begin{block}{}
	Due to the orthogonal decomposition theorem (Theorem \ref{thm:orthogonalDecomposition}), this decomposition is unique and due to the
	Pythagorean theorem (Theorem \ref{thm:pythagorean}) we have
	\begin{center}
		$\|\mathbf{y}-\mathbf{v}\|^2=\|\mathbf{y}-\hat{\mathbf{y}}\|^2+\|\hat{\mathbf{y}}-\mathbf{v}\|^2$
	\end{center}
	Since $\mathbf{v}\neq \hat{\mathbf{y}}$ we have $\|\hat{\mathbf{y}}-\mathbf{v}\|^2>0$ and consequently
	\begin{center}
		$\|\mathbf{y}-\mathbf{v}\|^2>\|\mathbf{y}-\hat{\mathbf{y}}\|^2$
	\end{center}
	
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Properties of orthogonal projections} 
\begin{ceuthm}
	If $\{\mathbf{u}_1,\mathbf{u}_2,...,\mathbf{u}_p\}$ is an orthonormal basis of $W$, then the orthogonal projection of $\mathbf{y}$ onto $W$ is 
	\begin{center}
		$\hat{\mathbf{y}}=\left<\mathbf{y},\mathbf{u}_1\right>\mathbf{u}_1+\left<\mathbf{y},\mathbf{u}_2\right>\mathbf{u}_2+...+\left<\mathbf{y},\mathbf{u}_p\right>\mathbf{u}_p$
	\end{center}
	If we construct the orthonormal matrix $U=\begin{pmatrix}\mathbf{u}_1 & \mathbf{u}_2 &...&\mathbf{u}_p\end{pmatrix}$, then
	\begin{center}
		$\hat{\mathbf{y}}=UU^T\mathbf{y}$
	\end{center}
	\underline{\textit{Proof}}\\
	By Theorem \ref{thm:orthogonalDecomposition} we know that for all orthogonal bases it is verified
	\begin{center}
		$\hat{\mathbf{y}}=\frac{\mathbf{y}\cdot\mathbf{u}_1}{\|\mathbf{u}_1\|^2}\mathbf{u}_1+
		                  \frac{\mathbf{y}\cdot\mathbf{u}_2}{\|\mathbf{u}_2\|^2}\mathbf{u}_2+
											...+
											\frac{\mathbf{y}\cdot\mathbf{u}_p}{\|\mathbf{u}_p\|^2}\mathbf{u}_p$\\
	\end{center}
	Since the basis is in this case orthonormal, then $\|\mathbf{u}\|=1$ and consequently
	\begin{center}
		$\hat{\mathbf{y}}=\left<\mathbf{y},\mathbf{u}_1\right>\mathbf{u}_1+\left<\mathbf{y},\mathbf{u}_2\right>\mathbf{u}_2+...+\left<\mathbf{y},\mathbf{u}_p\right>\mathbf{u}_p$
	\end{center}
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Properties of orthogonal projections} 
\begin{block}{}
	On the other side we have
	\begin{center}
		$U^T\mathbf{y}=\begin{pmatrix}\mathbf{u}_1^T\\ \mathbf{u}_2^T \\ ... \\ \mathbf{u}_p^T\end{pmatrix}\mathbf{y}
		    =\begin{pmatrix}\mathbf{u}_1^T\mathbf{y}\\ \mathbf{u}_2^T\mathbf{y} \\ ... \\ \mathbf{u}_p^T\mathbf{y}\end{pmatrix}
				=\begin{pmatrix}\left<\mathbf{u}_1,\mathbf{y}\right>\\ \left<\mathbf{u}_2,\mathbf{y}\right> \\ ... \\ \left<\mathbf{u}_p,\mathbf{y}\right>\end{pmatrix}$
	\end{center}
	Then,
	\begin{center}
		$UU^T\mathbf{y}=\begin{pmatrix}\mathbf{u}_1 & \mathbf{u}_2 & ... & \mathbf{u}_p\end{pmatrix}
		    \begin{pmatrix}\left<\mathbf{u}_1,\mathbf{y}\right>\\ \left<\mathbf{u}_2,\mathbf{y}\right> \\ ... \\ \left<\mathbf{u}_p,\mathbf{y}\right>\end{pmatrix}
				=\left<\mathbf{y},\mathbf{u}_1\right>\mathbf{u}_1+\left<\mathbf{y},\mathbf{u}_2\right>\mathbf{u}_2+...+\left<\mathbf{y},\mathbf{u}_p\right>\mathbf{u}_p$
	\end{center}
	(q.e.d.)
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Properties of orthogonal projections} 
\begin{block}{Corollary}
	Let $U=\begin{pmatrix}\mathbf{u}_1 & \mathbf{u}_2 &...&\mathbf{u}_p\end{pmatrix}$ be a $n\times p$ matrix with orthonormal columns and $W=\mathrm{Col}\{U\}$ its column
	space. Then,
	\begin{center}
		\begin{tabular}{cl}
			$\forall\mathbf{x}\in\mathbb{R}^p\quad U^TU\mathbf{x}=\mathbf{x}$ & No effect \\
			$\forall\mathbf{y}\in\mathbb{R}^n\quad UU^T\mathbf{y}=\hat{\mathbf{y}}$ & Orthogonal projection of $\mathbf{y}$ onto $W$\\
		\end{tabular}
	\end{center}
	If $U$ is a $n\times n$, then $W=\mathbb{R}^n$ and the projection has no effect
	\begin{center}
		\begin{tabular}{cl}
			$\forall\mathbf{y}\in\mathbb{R}^n\quad UU^T\mathbf{y}=\hat{\mathbf{y}}=\mathbf{y}$ & No effect\\
		\end{tabular}
	\end{center}
	\label{cor:U}
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 3:
	\begin{itemize}
		\item 6.3.1
		\item 6.3.7
		\item 6.3.15
		\item 6.3.23
		\item 6.3.24
		\item 6.3.25 (computer)
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Gram-Schmidt orthogonalization (b)} 
\Outline

\begin{frame}\frametitle{Gram-Schmidt orthogonalization} 
Gram-Schmidt orthogonalization is a procedure aimed at producing an orthogonal basis of any subspace $W$.

\begin{exampleblock}{Example}
	Let $W=\mathrm{Span}\{\mathbf{x}_1,\mathbf{x}_2\}$ with $\mathbf{x}_1=(3,6,0)$ and $\mathbf{x}_2=(1,2,2)$. Let's look for an orthogonal basis of $W$.\\
	\underline{\textit{Solution}}\\
	We may keep the first vector for the basis
	\begin{center}
		$\mathbf{v}_1=\mathbf{x}_1=(3,6,0)$
	\end{center}
	For the second vector in the basis, we need to keep the component of $\mathbf{x}_2$ that is orthogonal to $\mathbf{x}_1$. For doing so we calculate the projection of
	$\mathbf{x}_2$ onto $\mathbf{x}_1$ ($\mathbf{p}$), and we decompose $\mathbf{x}_2$ as
	\begin{center}
		$\mathbf{x}_2=\mathbf{p}+(\mathbf{x}_2-\mathbf{p})=(1,2,0)+(0,0,2)$
	\end{center}
	We, then, keep the orthogonal part of $\mathbf{x}_2$
	\begin{center}
		$\mathbf{v}_2=\mathbf{x}_2-\mathbf{p}=(0,0,2)$
	\end{center}
	\label{ex:GramSchmidt}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Gram-Schmidt orthogonalization} 
\begin{exampleblock}{Example (continued)}
  The set $\{\mathbf{v}_1,\mathbf{v}_2\}$ is an orthogonal basis of $W$.
	\begin{center}
		\includegraphics[scale=0.4]{figGramSchmidt.jpg}
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Gram-Schmidt orthogonalization} 
\begin{exampleblock}{Example}
	Let $W=\mathrm{Span}\{\mathbf{x}_1,\mathbf{x}_2,\mathbf{x}_3\}$ with $\mathbf{x}_1=(1,1,1,1)$, $\mathbf{x}_2=(0,1,1,1)$ and $\mathbf{x}_3=(0,0,1,1)$. Let's look for an orthogonal basis of $W$.\\
	\underline{\textit{Solution}}\\
	We may keep the first vector for the basis. Then we construct a subspace ($W_1$) with a single element in its basis
	\begin{center}
		\begin{tabular}{cc}
			$\mathbf{v}_1=\mathbf{x}_1=(1,1,1,1)$ & $W_1=\mathrm{Span}\{\mathbf{v}_1\}$
		\end{tabular}
	\end{center}
	For the second vector in the basis, we need to keep the component of $\mathbf{x}_2$ that is orthogonal to $W_1$. With the already computed basis vectors, we
	construct a new subspace ($W_2$) with two elements in its basis
	\begin{center}
		\begin{tabular}{cc}
			$\mathbf{v}_2=\mathbf{x}_2-\mathrm{Proj}_{W_1}(\mathbf{x}_2)=(-\frac{3}{4},\frac{1}{4},\frac{1}{4},\frac{1}{4})$ & $W_2=\mathrm{Span}\{\mathbf{v}_1,\mathbf{v}_2\}$
		\end{tabular}
	\end{center}
	For the third vector in the basis, we repeat the same procedure
	\begin{center}
		\begin{tabular}{cc}
			$\mathbf{v}_3=\mathbf{x}_3-\mathrm{Proj}_{W_2}(\mathbf{x}_3)=(0,-\frac{2}{3},\frac{1}{3},\frac{1}{3})$ & $W_3=\mathrm{Span}\{\mathbf{v}_1,\mathbf{v}_2,\mathbf{v}_3\}$
		\end{tabular}
	\end{center}
	\label{ex:GramSchmidt2}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Gram-Schmidt orthogonalization} 
\begin{ceuthm}[Gram-Schmidt orthogonalization]
	Given a basis $\{\mathbf{x}_1,\mathbf{x}_2,...,\mathbf{x}_p\}$ for a vector subspace $W$. Define
	\begin{center}
		\begin{tabular}{cl}
			$\mathbf{v}_1=\mathbf{x}_1$ & $W_1=\mathrm{Span}\{\mathbf{v}_1\}$ \\
			$\mathbf{v}_2=\mathbf{x}_2-\mathrm{Proj}_{W_1}(\mathbf{x}_2)$ & $W_2=\mathrm{Span}\{\mathbf{v}_1,\mathbf{v}_2\}$ \\
			... & \\
			$\mathbf{v}_p=\mathbf{x}_p-\mathrm{Proj}_{W_{p-1}}(\mathbf{x}_p)$ & $W_p=\mathrm{Span}\{\mathbf{v}_1,\mathbf{v}_2,...,\mathbf{v}_p\}=W$
		\end{tabular}
	\end{center}
	Then $\{\mathbf{v}_1,\mathbf{v}_2,...,\mathbf{v}_p\}$ is an orthogonal basis of $W$.\\
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Gram-Schmidt orthogonalization}
\begin{block}{} 
	\underline{\textit{Proof}}\\
	Consider $W_k=\mathrm{Span}\{\mathbf{v}_1,\mathbf{v}_2,...,\mathbf{v}_k\}$ and let us assume that $\{\mathbf{v}_1,\mathbf{v}_2,...,\mathbf{v}_k\}$ is a basis of $W_k$.
	Now we construct
	\begin{center}
		\begin{tabular}{cl}
			$\mathbf{v}_{k+1}=\mathbf{x}_{k+1}-\mathrm{Proj}_{W_k}(\mathbf{x}_{k+1})$ & $W_{k+1}=\mathrm{Span}\{\mathbf{v}_1,\mathbf{v}_2,...,\mathbf{v}_{k+1}\}$ \\
		\end{tabular}
	\end{center}
	By the orthogonal decomposition theorem (Theorem \ref{thm:orthogonalDecomposition}), we know that $\mathbf{v}_{k+1}$ is orthogonal to $W_k$. Because $\mathbf{x}_{k+1}$ is
	an element of a basis, we know that $\mathbf{x}_{k+1}\notin W_k$. Therefore, $\mathbf{v}_{k+1}$ is not null and $\mathbf{x}_{k+1}\in W_{k+1}$. Finally, the set
	$\{\mathbf{v}_1,\mathbf{v}_2,...,\mathbf{v}_{k+1}\}$ is a set of orthogonal, non-null vectors in the $(k+1)$-dimensional space $W_{k+1}$. Consequently, by Theorem 9.4 in Chapter 5, 
	it must be a basis of $W_{k+1}$. This process can be iterated till $k=p$.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Gram-Schmidt orthogonalization}
\begin{block}{Orthonormal basis} 
	Once we have an orthogonal basis, we simply have to normalize each vector to have an orthonormal basis.
\end{block}

\begin{exampleblock}{Example} 
	Let $W=\mathrm{Span}\{\mathbf{x}_1,\mathbf{x}_2\}$ with $\mathbf{x}_1=(3,6,0)$ and $\mathbf{x}_2=(1,2,2)$. Let's look for an orthonormal basis of $W$.\\
	\underline{\textit{Solution}}\\
	In Slide \pageref{ex:GramSchmidt} we learned that an orthogonal basis was given by 
	\begin{center}
		$\mathbf{v}_1=(3,6,0)$\\
		$\mathbf{v}_2=(0,0,2)$
	\end{center}
	Now, we normalize these two vectors to obtain an orthonormal basis
	\begin{center}
		$\mathbf{v}_1'=\frac{\mathbf{v}_1}{\|\mathbf{v}_1\|}=\frac{1}{\sqrt{45}}(3,6,0)=(\frac{1}{\sqrt{5}},\frac{2}{\sqrt{5}},0)$\\
		$\mathbf{v}_2'=\frac{\mathbf{v}_2}{\|\mathbf{v}_2\|}=\frac{1}{2}(0,0,2)=(0,0,1)$\\
	\end{center}
	
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{QR factorization of matrices}
If we apply the Gram-Schmidt factorization to the columns of a matrix, we have the following factorization scheme. This factorization is used in practice
to find eigenvalues and eigenvectors as well as to solve linear equation systems.
\begin{ceuthm}[QR factorization]
	Let $A\in\mathcal{M}_{m\times n}$ with linearly independent columns. Then, $A$ can be factored as
	\begin{center}
		$A=QR$
	\end{center}
	where $Q\in\mathcal{M}_{m\times n}$ 
	is a matrix whose columns form an orthonormal basis of $\mathrm{Col}\{A\}$ and
	$R \in\mathcal{M}_{n\times n}$ is an upper triangular 
	invertible matrix with positive entries on its diagonal.\\
	\underline{\textit{Proof}}\\
	Let's orthogonalize the columns of $A$ following the Gram-Schmidt procedure and construct the orthonormal basis of $\mathrm{Col}\{A\}$. Let $\{\mathbf{u}_1,\mathbf{u}_2, ...,
	\mathbf{u}_n\}$ be such a basis. Let us construct the matrix
	\begin{center}
		$Q=\begin{pmatrix}\mathbf{u}_1 & \mathbf{u}_2 & ... & \mathbf{u}_n\end{pmatrix}$
	\end{center}
	\label{thm:qr}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{QR factorization of matrices}
\begin{block}{}
	Let us call $\mathbf{a}_i$ ($i=1,2,...,n$) to the columns of $A$. By the Gram-Schmidt orthogonalization, we know that for any $k$ between 1 and $n$ we have
	\begin{center}
		$\mathrm{Span}\{\mathbf{a}_1,\mathbf{a}_2, ...,	\mathbf{a}_k\}=\mathrm{Span}\{\mathbf{u}_1,\mathbf{u}_2, ...,	\mathbf{u}_k\}$
	\end{center}
	Consequently, we can express each column of $A$ in the orthonormal basis:
	\begin{center}
		$\mathbf{a}_k=r_{1k}\mathbf{u}_1+r_{2k}\mathbf{u}_2+ ...+r_{kk}\mathbf{u}_k+0\cdot\mathbf{u}_{k+1}+...+0\cdot\mathbf{u}_n$
	\end{center}
	If $r_{kk}$ is negative, we can multiply both $r_{kk}$ and $\mathbf{u}_k$ by -1. 
	We now collect all these coefficients in a vector $\mathbf{r}_k=(r_{1k},r_{2k},...,r_{kk},0,0,...,0)$ to have
	\begin{center}
		$\mathbf{a}_k=Q\mathbf{r}_k$
	\end{center}
	By gathering all these vectors in a matrix, we have the triangular matrix $R$
	\begin{center}
		$R=\begin{pmatrix}\mathbf{r}_1 & \mathbf{r}_2 & ... & \mathbf{r}_n\end{pmatrix}$
	\end{center}
	$R$ is invertible because the columns of $A$ are linearly independent.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{QR factorization of matrices}
\begin{exampleblock}{Example}
	Let's calculate the QR factorization of $A=\begin{pmatrix}1 & 0 & 0 \\ 1 & 1 & 0 \\ 1 & 1 & 1 \\ 1 & 1 & 1 \end{pmatrix}$. From Slide \pageref{ex:GramSchmidt2} we know that
	the vectors
	\begin{center}
			$\mathbf{v}_1=(1,1,1,1)$ \\
			$\mathbf{v}_2=(-\frac{3}{4},\frac{1}{4},\frac{1}{4},\frac{1}{4})$ \\
			$\mathbf{v}_3=(0,-\frac{2}{3},\frac{1}{3},\frac{1}{3})$ \\
	\end{center}
	Is an orthogonal basis of the column space of $A$. We now normalize these vectors to obtain the orthonormal basis in $Q$
	\begin{center}
		$Q=\begin{pmatrix}
		     \frac{1}{2} & -\frac{3}{\sqrt{12}} & 0 \\
		     \frac{1}{2} & \frac{1}{\sqrt{12}} & -\frac{2}{\sqrt{6}} \\
		     \frac{1}{2} & \frac{1}{\sqrt{12}} & \frac{1}{\sqrt{6}} \\
		     \frac{1}{2} & \frac{1}{\sqrt{12}} & \frac{1}{\sqrt{6}} \\
		   \end{pmatrix}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{QR factorization of matrices}
\begin{exampleblock}{Example (continued)}
	To find $R$ we multiply on both sides of the factorization by $Q^T$
	\begin{center}
		$A=QR \Rightarrow Q^TA=Q^TQR=R$ \\
		$\begin{array}{rcl}R=Q^TA&=&\begin{pmatrix}\frac{1}{2} &\frac{1}{2} &\frac{1}{2} &\frac{1}{2} \\
		    -\frac{3}{\sqrt{12}} &\frac{1}{\sqrt{12}} &\frac{1}{\sqrt{12}} &\frac{1}{\sqrt{12}} \\
				0 & -\frac{2}{\sqrt{6}} & \frac{1}{\sqrt{6}} & \frac{1}{\sqrt{6}} \end{pmatrix}
				\begin{pmatrix}1 & 0 & 0 \\ 1 & 1 & 0 \\ 1 & 1 & 1 \\ 1 & 1 & 1 \end{pmatrix}\\
				&=&\begin{pmatrix}2 & \frac{3}{2} & 1 \\ 0 & \frac{3}{\sqrt{12}} & \frac{2}{\sqrt{12}} \\ 0 & 0 & \frac{1}{\sqrt{6}}\end{pmatrix}\end{array}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 4:
	\begin{itemize}
		\item 6.4.7
		\item 6.4.13
		\item 6.4.19
		\item 6.4.22
		\item 6.4.24
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Least squares (c)} 
\Outline

\begin{frame}\frametitle{Least squares}

Let's assume we want to solve the equation system $A\mathbf{x}=\mathbf{b}$, but, due to noise, there is no solution. We may still look for a solution such that $A\mathbf{x}\approx\mathbf{b}$. In fact the goal will be to minimize $d(A\mathbf{x},\mathbf{b})$.

\begin{ceudef}[Least squares solution]
	Let $A$ be a $m\times n$ matrix and $\mathbf{b}\in\mathbb{R}^m$. $\hat{\mathbf{x}\in\mathbb{R}^n}$ is a \textbf{least squares solution} of the equation system $A\mathbf{x}=\mathbf{b}$
	iff
	\begin{center}
		$\forall\mathbf{x}\in\mathbb{R}^n\quad \|\mathbf{b}-A\hat{\mathbf{x}}\|\leq\|\mathbf{b}-A\mathbf{x}\|$\\
		\includegraphics[scale=0.45]{figLeastSquares.jpg}
	\end{center}
\end{ceudef}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{block}{Solution of the general least squares problem}
  Applying the Best Approximation Theorem (Theorem \ref{thm:bestApproximation}), we may project $\mathbf{b}$ onto the column space of $A$
	\begin{center}
		$\hat{\mathbf{b}}=\mathrm{Proj}_{\mathrm{Col}\{A\}}\{\mathbf{b}\}$
	\end{center}
	\begin{columns}
		\begin{column}{5cm}
			Then, we solve the equation system
			\begin{center}
				$A\mathbf{x}=\hat{\mathbf{b}}$
			\end{center}
			that has at least one solution.
		\end{column}
		\begin{column}{6cm}
			\begin{center}
				\includegraphics[scale=0.35]{figLeastSquares2.jpg}
			\end{center}
		\end{column}
	\end{columns}
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{ceuthm}
  The set of least-squares solutions of $A\mathbf{x}=\mathbf{b}$ is the same as the set of solutions of the normal equations
	\begin{center}
		$A^TA\mathbf{x}=A^T\mathbf{b}$
	\end{center}
	\underline{\textit{Proof: least-squares solutions $\subset$ normal equations solutions}}\\
	Let us assume that $\hat{\mathbf{x}}$ is a least-squares solution. Then, $\mathbf{b}-A\hat{\mathbf{x}}$ is orthogonal to $\mathrm{Col}\{A\}$,
	and in particular, to each one of the columns of $A$ ($\mathbf{a}_i$, $i=1,2,...,n$):
	\begin{center}
		$\mathbf{a}_i \cdot (\mathbf{b}-A\hat{\mathbf{x}})=0 \quad \forall i\in\{1,2,...,n\}\Rightarrow$\\
		$\mathbf{a}_i^T (\mathbf{b}-A\hat{\mathbf{x}})=0 \quad \forall i\in\{1,2,...,n\} \Rightarrow$ \\
		$A^T (\mathbf{b}-A\hat{\mathbf{x}})=\mathbf{0} \Rightarrow$ \\
		$A^T\mathbf{b}=A^TA\hat{\mathbf{x}}$ \\
	\end{center}
	That is, every least-squares solution is also a solution of the normal equations.
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{block}{}
	\underline{\textit{Proof: least-squares solutions $\supset$ normal equations solutions}}\\
	Let us assume that $\hat{\mathbf{x}}$ is solution of the normal equations. Then,
	\begin{center}
		$A^T\mathbf{b}=A^TA\hat{\mathbf{x}} \Rightarrow$ \\
		$A^T (\mathbf{b}-A\hat{\mathbf{x}})=0 \Rightarrow$ \\
		$\mathbf{a}_i^T (\mathbf{b}-A\hat{\mathbf{x}})=0 \quad \forall i\in\{1,2,...,n\}$ \\
	\end{center}
	That is, $\mathbf{b}-A\hat{\mathbf{x}}$ is orthogonal to the columns of $A$ and, consequently, to $\mathrm{Col}\{A\}$.
	Hence, the equation
	\begin{center}
	  $\mathbf{b}=A\hat{\mathbf{x}}+(\mathbf{b}-A\hat{\mathbf{x}})$
	\end{center}
	is the orthogonal decomposition of $\mathbf{b}$ as a vector in $\mathrm{Col}\{A\}$ and a vector orthogonal to $\mathrm{Col}\{A\}$.
	By the uniqueness of the orthogonal decomposition, $A\hat{\mathbf{x}}$ must be the orthogonal projection of $\mathbf{b}$ onto $\mathrm{Col}\{A\}$ so that
	\begin{center}
		$A\hat{\mathbf{x}}=\hat{\mathbf{b}}$
	\end{center}
	and, therefore, $\hat{\mathbf{x}}$ is a least-squares solution.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{exampleblock}{Example}
	Find a least-squares solution to $A\mathbf{x}=\mathbf{b}$ with $A=\begin{pmatrix}4 & 0 \\ 0 & 2 \\ 1 & 1\end{pmatrix}$ and $\mathbf{b}=\begin{pmatrix}2\\0\\11\end{pmatrix}$.\\
	\underline{\textit{Solution}}\\
	Let's solve the normal equations $A^TA\hat{\mathbf{x}}=A^T\mathbf{b}$
	\begin{center}
		\begin{tabular}{cc}
			$A^TA=\begin{pmatrix}17 & 1 \\ 1 & 5\end{pmatrix}$ &
			$A^T\mathbf{b}=\begin{pmatrix}19\\11\end{pmatrix}$
		\end{tabular}\\
		$\begin{pmatrix}17 & 1 \\ 1 & 5\end{pmatrix}\hat{\mathbf{x}}=\begin{pmatrix}19\\11\end{pmatrix}\Rightarrow
		 \hat{\mathbf{x}}=\begin{pmatrix}17 & 1 \\ 1 & 5\end{pmatrix}^{-1}\begin{pmatrix}19\\11\end{pmatrix}=\begin{pmatrix}1\\2\end{pmatrix}
		$
	\end{center}
	Let's check that $\hat{\mathbf{x}}$ is not a solution of the original equation system but a least-squares solution
	\begin{center}
		$A\hat{\mathbf{x}}=\begin{pmatrix}4 & 0 \\ 0 & 2 \\ 1 & 1\end{pmatrix}\begin{pmatrix}1\\2\end{pmatrix}=\begin{pmatrix}4\\4\\3\end{pmatrix}=\hat{\mathbf{b}}\neq
		\mathbf{b} = \begin{pmatrix}2\\0\\11\end{pmatrix}$
	\end{center}
	
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{ceudef}[Least-squares error]
	The least-squares error is defined as
	\begin{center}
		$\sigma^2_\epsilon\triangleq \|A\hat{\mathbf{x}}-\mathbf{b}\|^2= \|\hat{\mathbf{b}}-\mathbf{b}\|^2$
	\end{center}
\end{ceudef}

\begin{exampleblock}{Example (continued)}
  In this case: 
	\begin{center}
		$\sigma^2_\epsilon=\|(4,4,3)-(2,0,11)\|=\|(2,4,-8)\|\approx 9.165$
	\end{center}
	
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{exampleblock}{Example}
	Unfortunately, the least-squares solution may not be unique as shown in the next example (arising in ANOVA).
	Find a least-squares solution to $A\mathbf{x}=\mathbf{b}$ with $A=\begin{pmatrix}1 & 1 & 0 & 0 \\ 1 & 1 & 0 & 0 \\ 1 & 0 & 1 & 0 \\ 1 & 0 & 1 & 0 \\
	1 & 0 & 0 & 1 \\ 1 & 0 & 0 & 1 \\ \end{pmatrix}$ and $\mathbf{b}=\begin{pmatrix}-3\\-1\\0\\2\\5\\1\end{pmatrix}$.\\
	\underline{\textit{Solution}}\\
	\begin{center}
		\begin{tabular}{cc}
			$A^TA=\begin{pmatrix}6 & 2 & 2 & 2 \\ 2 & 2 & 0 & 0 \\ 2 & 0 & 2 & 0 \\ 2 & 0 & 0 & 2\end{pmatrix}$ &
			$A^T\mathbf{b}=\begin{pmatrix}4\\-4\\2\\6\end{pmatrix}$
		\end{tabular}\\
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{exampleblock}{Example (continued)}
  The augmented matrix is
	\begin{center}
			$\left(\begin{array}{rrrr|r}6 & 2 & 2 & 2 & 4 \\ 2 & 2 & 0 & 0 & -4 \\ 2 & 0 & 2 & 0 & 2 \\ 2 & 0 & 0 & 2 & 6\end{array}\right) \sim
			 \left(\begin{array}{rrrr|r}1 & 0 & 0 & 1 & 3 \\ 0 & 1 & 0 & -1 & -5 \\ 0 & 0 & 1 & -1 & -2 \\ 0 & 0 & 0 & 0 & 0\end{array}\right)$
	\end{center}
	Any point of the form
	\begin{center}
			$\hat{\mathbf{x}}=\begin{pmatrix}3\\-5\\-2\\0\end{pmatrix}+x_4\begin{pmatrix}-1\\1\\1\\1\end{pmatrix} \quad \forall x_4\in\mathbb{R}$
	\end{center}
	is a least-squares solution of the problem.
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares}

\begin{ceuthm}
  The matrix $A^TA$ is invertible iff the columns of $A$ are linearly independent. In this case, the equation system $A\mathbf{x}=\mathbf{b}$ has 
	a unique least-squares solution given by 
	\begin{center}
		$\hat{\mathbf{x}}=A^+\mathbf{b}$
	\end{center}
	where $A^+$ is the \textbf{Moore-Penrose pseudoinverse}
	\begin{center}
		$A^+=(A^TA)^{-1}A^T$
	\end{center}
	\label{thm:uniqueLSSolution}
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares and QR decomposition}

Sometimes $A^TA$ is ill-conditioned, this means that small perturbations in $\mathbf{b}$ translate into large perturbations in $\hat{\mathbf{x}}$. The QR
decomposition offers a numerically more stable way of finding the least-squares solution.

\begin{ceuthm}
  Let there be $A\in\mathcal{M}_{m\times n}$ with linearly independent columns. Consider its QR decomposition ($A=QR$). Then, for each $\mathbf{b}\in\mathbb{R}^m$ there
	is a unique least-squares solution of $A\mathbf{x}=\mathbf{b}$ given by 
	\begin{center}
		$\hat{\mathbf{x}}=R^{-1}Q^T\mathbf{b}$
	\end{center}
	\underline{\textit{Proof}}\\
	If we substitute $\hat{\mathbf{x}}=R^{-1}Q^T\mathbf{b}$ into $A\mathbf{x}$ we have
	\begin{center}
		$A\hat{\mathbf{x}}=AR^{-1}Q^T\mathbf{b}=QRR^{-1}Q^T\mathbf{b}=QQ^T\mathbf{b}$.
	\end{center}
	But $Q$ is an orthonormal basis of $\mathrm{Col}\{A\}$ (Theorem \ref{thm:qr} and Corollary in Slide \pageref{cor:U}) and consequently $QQ^T\mathbf{b}$ is the
	orthogonal projection of $\mathbf{b}$ onto $\mathrm{Col}\{A\}$, that is, $\hat{\mathbf{b}}$. So, $\hat{\mathbf{x}}=R^{-1}Q^T\mathbf{b}$ is a least-squares solution
	of $A\mathbf{x}=\mathbf{b}$. Additionally, since the columns of $A$ are linearly independent, by Theorem \ref{thm:uniqueLSSolution}, this solution is unique.
\end{ceuthm}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least squares and QR decomposition}

Remind that numerically it is easier to solve $R\hat{\mathbf{x}}=Q^T\mathbf{b}$ than $\hat{\mathbf{x}}=R^{-1}Q^T\mathbf{b}$
\begin{exampleblock}
	Let $A=\begin{pmatrix}1 & 3 & 5 \\ 1 & 1 & 0 \\ 1 & 1 & 2 \\ 1 & 3 & 3 \end{pmatrix}$ and $\mathbf{b}=\begin{pmatrix}3\\5\\7\\-3\end{pmatrix}$. Its QR decomposition is
	\begin{center}
		$A=QR=\left(\begin{array}{rrr}\frac{1}{2} & \frac{1}{2} & \frac{1}{2} \\ \frac{1}{2} & -\frac{1}{2} & -\frac{1}{2} \\ \frac{1}{2} & -\frac{1}{2} & \frac{1}{2} \\
		   \frac{1}{2} & \frac{1}{2} & -\frac{1}{2} \end{array}\right)\begin{pmatrix} 2 & 4 & 5 \\ 0 & 2 & 3 \\ 0 & 0 & 2 \end{pmatrix}$\\
		$Q^T\mathbf{b}=\left(\begin{array}{r}6\\-6\\4\end{array}\right) \Rightarrow \begin{pmatrix} 2 & 4 & 5 \\ 0 & 2 & 3 \\ 0 & 0 & 2 \end{pmatrix}
		\hat{\mathbf{x}}=\left(\begin{array}{r}6\\-6\\4\end{array}\right)\Rightarrow \hat{\mathbf{x}}=\left(\begin{array}{r}10\\-6\\2\end{array}\right)$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 5:
	\begin{itemize}
		\item 6.5.1
		\item 6.5.19
		\item 6.5.20
		\item 6.5.21
		\item 6.5.24
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Least-squares linear regression (c)} 
\Outline

\begin{frame}\frametitle{Least-squares linear regression} 
\begin{exampleblock}{Example}
	In many scientific and engineering problems, it is needed to explain some observations $\mathbf{y}$ as a linear function of an independent variable $\mathbf{x}$.
	For instance, we may try to explain the weight of a person as a linear function of its height
	\begin{columns}
		\begin{column}{5cm}
			$Weight=\beta_0+\beta_1 Height$\\
		\end{column}
		\begin{column}{5cm}
			\begin{center}
				\includegraphics[scale=0.35]{figRegression.jpg}
			\end{center}
		\end{column}
	\end{columns}
	\begin{tiny}
		A. Schneider, G. Hommel, M. Blettner. \textit{Linear Regression Analysis}. Dtsch Arztebl Int. \textbf{2010} November; 107(44): 776–782.
	\end{tiny}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{exampleblock}{Example (continued)}
	For each observation we have an equation
	\begin{columns}
		\begin{column}{5cm}
			\begin{center}
				\begin{tabular}{c|c}
					\textbf{Height} (m.) & \textbf{Weight} (kg.) \\
					\hline
					1.70 & 57 \\
					1.53 & 43 \\
					1.90 & 94 \\
					... & ...
				\end{tabular}
			\end{center}
		\end{column}
		\begin{column}{5cm}
			\begin{center}
				$\begin{array}{l}
					57=\beta_0+1.70\beta_1 \\
					43=\beta_0+1.53\beta_1 \\
					94=\beta_0+1.90\beta_1 \\
					...
				\end{array}
				$
			\end{center}
		\end{column}
	\end{columns}
	\begin{center}
			$\begin{pmatrix} 1 & 1.70 \\ 1 & 1.53 \\ 1 & 1.90 \\ ... & ... \end{pmatrix} \begin{pmatrix}\beta_0\\\beta_1\end{pmatrix}=\begin{pmatrix}57\\43\\94\\...\end{pmatrix}$
	\end{center}
	which is of the form
	\begin{center}
			$X\mathbf{\beta}=\mathbf{y}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{block}{Least-squares regression}
	Each one of the observed \textbf{data points} $(x_j,y_j)$ gives an equation. All together provide an equation system
	\begin{center}
			$X\mathbf{\beta}=\mathbf{y}$
	\end{center}
	that is an overdetermined, linear equation system of the form $A\mathbf{x}=\mathbf{b}$. The matrix $X$ is called the \textbf{system matrix} and it is related to the \textbf{independent} (\textbf{predictor})
	variables (the height in this case). The vector $\mathbf{y}$ is called the \textbf{observation vector} and collects the values of the \textbf{dependent} (\textbf{predicted}) variable (the weight in this
	case). The model
	\begin{center}
			$y=\beta_0+\beta_1 x+\epsilon$
	\end{center}
	is called the \textbf{linear regression of} $y$ \textbf{on} $x$. $\beta_0$ and $\beta_1$ are called the \textbf{regression coefficients}. The difference between the predicted value and the observed value
	for a particular observation ($\epsilon$) is called the \textbf{residual} of that observation.
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{block}{}
	\begin{center}
		\includegraphics[scale=0.5]{figRegression2.jpg}
	\end{center}
	The residual of the $j$-th observation is defined as
	\begin{center}
		$\epsilon_j=y_j-(\beta_0+\beta_1 x_j)$
	\end{center}
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{block}{}
	The goal of least-squares regression is to minimize
	\begin{center}
		$\sum\limits_{j=1}^n\epsilon_j^2=\|\mathbf{y}-X\mathbf{\beta}\|^2$
	\end{center}
	Let's analyze this term
	\begin{center}
		$X\mathbf{\beta}=\begin{pmatrix}1 & x_1 \\ 1 & x_2 \\ ... & ... \\ 1 & x_n\end{pmatrix}\begin{pmatrix}\beta_0 \\ \beta_1\end{pmatrix}=
		   \begin{pmatrix}\beta_0+\beta_1x_1 \\ \beta_0+\beta_2x_2 \\ ... \\ \beta_0+\beta_nx_n\end{pmatrix}=
			 \begin{pmatrix}\hat{y}_1 \\ \hat{y}_2 \\ ... \\ \hat{y}_n\end{pmatrix}$
	\end{center}
	Then
	\begin{center}
		$\|\mathbf{y}-X\mathbf{\beta}\|^2=\left\|\begin{pmatrix}y_1-\hat{y}_1 \\ y_2-\hat{y}_2 \\ ... \\ y_n-\hat{y}_n\end{pmatrix}\right\|^2
		=\sum\limits_{j=1}^n(y_j-\hat{y}_j)^2=\sum\limits_{j=1}^n\epsilon_j^2$
	\end{center}
	
\end{block}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{exampleblock}{Example}
	Suppose we have observed the following values of height and weight (1.70,57), (1.53,43), (1.90,94). We construct the system matrix
	$X=\begin{pmatrix} 1 & 1.70 \\ 1 & 1.53 \\ 1 & 1.90 \end{pmatrix}$ and the observation vector $\mathbf{y}=\begin{pmatrix}57\\43\\94\end{pmatrix}$.
	Now we look the normal equations
	\begin{center}
		$X\mathbf{\beta}=\mathbf{y} \Rightarrow X^TX\mathbf{\beta}=X^T\mathbf{y}$ \\
		\begin{tabular}{cc}
			$X^TX=\begin{pmatrix}3.00 & 5.13 \\ 5.13 & 8.84\end{pmatrix}$ &
			$X^T\mathbf{y}=\begin{pmatrix}194.00 \\ 341.29\end{pmatrix}$
		\end{tabular}
		$\hat{\mathbf{\beta}}=(X^TX)^{-1}X^T\mathbf{y}=\begin{pmatrix}-173.14 \\ 137.90\end{pmatrix}$\\
		$Weight=-173.39+139.21 Height$
	\end{center}
	
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{exampleblock}{Example}
	\begin{columns}
		\begin{column}{6.5cm}
			\includegraphics[scale=0.41]{figRegression3.eps}
		\end{column}
		\begin{column}{6cm}
	MATLAB:\\
	{\color{blue}\texttt{
X=[1 1.70; 1 1.53; 1 1.90];\\
y=[57; 43; 94];\\
beta=inv(X'*X)*X'*y\\
x=1.5:0.01:2.00;\\
yp=beta(1)+beta(2)*x;\\
plot(x,yp,X(:,1),y,'o')\\
xlabel('Height (m)')\\
ylabel('Weight (kg)')
		}}
		\end{column}
	\end{columns}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{block}{The general linear model}
	The linear model is not restricted to straight lines. We can use it to fit any kind of curves:
	\begin{center}
		$y=\beta_0f_0(x)+\beta_1f_1(x)+\beta_2f_2(x)+...$
	\end{center}
\end{block}

\begin{exampleblock}{Fitting a parabola}
	\begin{tabular}{ccc}
		$\begin{array}{c} f_0(x)=1 \\ f_1(x)=x \\ f_2(x)=x^2\end{array} \Rightarrow $ &
		$\begin{array}{l} y_1=f_0(x_1)+\beta_1f_1(x_1)+\beta_2f_2(x_1) \\ y_2=f_0(x_2)+\beta_1f_1(x_2)+\beta_2f_2(x_2) \\ ... \\
		   y_n=f_0(x_n)+\beta_1f_1(x_n)+\beta_2f_2(x_n) \end{array}$ & \\
		& $\begin{pmatrix}y_1\\y_2\\...\\y_n\end{pmatrix}=\begin{pmatrix} 1 & x_1 & x_1^2 \\ 1 & x_2 & x_2^2 \\ ... & ... & ... \\ 1 & x_n & x_n^2 \end{pmatrix}
		    \begin{pmatrix}\beta_0 \\ \beta_1 \\ \beta_2 \end{pmatrix}+\begin{pmatrix}\epsilon_1\\\epsilon_2\\...\\\epsilon_n\end{pmatrix}$ & 
			$\Rightarrow \mathbf{y}=X\mathbf{\beta}+\mathbf{\epsilon}$
	\end{tabular}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{exampleblock}{Fitting a parabola}
   In this example they model the deformation of the wall of the zebra fish embryo as a function of strain.
	\begin{center}
		\includegraphics[height=4cm]{figRegressionParabola.jpg}
		\includegraphics[height=4cm]{figRegressionParabola2.jpg}
	\end{center}

\begin{tiny}
    Z. Lua, P. C.Y. Chen, H. Luo, J. Nam, R. Ge, W. Lin. 	\textit{Models of maximum stress and strain of zebrafish embryos under indentation}. J. Biomechanics 42 (5): 620–625 (\textbf{2009})
\end{tiny}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{block}{Multivariate linear regression}
	The linear model is not restricted to one variable. By fitting several variables we may fit surfaces and hypersurfaces
	\begin{center}
		$y=\beta_0f_0(x_1,x_2)+\beta_1f_1(x_1,x_2)+\beta_2f_2(x_1,x_2)+...$
	\end{center}
\end{block}

\begin{exampleblock}{Fitting a parabolic surface}
	\begin{center}
		$\begin{array}{l} f_0(x_1,x_2)=1 \\ f_1(x_1,x_2)=x_1 \\ f_2(x_1,x_2)=x_2 \\ f_3(x_1,x_2)=x_1^2 \\ f_4(x_1,x_2)=x_2^2 \\ f_5(x_1,x_2)=x_1x_2 \end{array} \Rightarrow 
		 X=\begin{pmatrix} 1 & x_{11} & x_{12} & x_{11}^2 & x_{12}^2 & x_{11}x_{12} \\ 1 & x_{21} & x_{22} & x_{21}^2 & x_{22}^2 & x_{21}x_{22} \\
		     ... & ... & ... & ... & ... & ... \\ 1 & x_{n1} & x_{n2} & x_{n1}^2 & x_{n2}^2 & x_{n1}x_{n2} \end{pmatrix}$
	\end{center}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Least-squares linear regression} 
\begin{exampleblock}{Fitting a parabolic surface}
   In this example they model the shape of cornea using videokeratoscopic images.
	\begin{center}
		\includegraphics[height=3cm]{figCornea1.png}
		\includegraphics[height=3cm]{figCornea2.png}\\
		\includegraphics[height=3.2cm]{figCornea3.jpg}\\		
	\end{center}

\begin{tiny}
    \url{http://www.fhp.tu-darmstadt.de/nt/index.php?id=531&L=1}{Signal Processing Group, Technische Universitat Darmstadt}
\end{tiny}
\end{exampleblock}

\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 6:
	\begin{itemize}
		\item 6.6.1
		\item 6.6.5
		\item 6.6.9
		\item 6.6.12 (computer)
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Inner product spaces (d)} 
\Outline

\begin{frame}\frametitle{Inner product spaces} 
	\begin{center}
		\includegraphics[height=7cm]{figSpaces.jpg}
	\end{center}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{ceudef}[Inner product]
	An inner product in a vector space $V$ is a function that assigns a real number to every pair of vectors $\mathbf{u}$ and $\mathbf{v}$, $\left<\mathbf{u},\mathbf{v}\right>$ and that 
	satisfies the following axioms for all $\mathbf{u}, \mathbf{v}, \mathbf{w} \in V$ and all scalars $c$:
	\begin{enumerate}
		\item $\left<\mathbf{u},\mathbf{v}\right>=\left<\mathbf{v},\mathbf{u}\right>$
		\item $\left<\mathbf{u}+\mathbf{v},\mathbf{w}\right>=\left<\mathbf{u},\mathbf{w}\right>+\left<\mathbf{v},\mathbf{w}\right>$
		\item $\left<c\mathbf{u},\mathbf{v}\right>=c\left<\mathbf{u},\mathbf{v}\right>$
		\item $\left<\mathbf{u},\mathbf{u}\right> \geq 0 $ and $\left<\mathbf{u},\mathbf{u}\right>=0$ iff $\mathbf{u}=\mathbf{0}$.
	\end{enumerate}
\end{ceudef}

\begin{exampleblock}{Example}
	For instance in \textbf{Weighted Least Squares} (WLS) we may use an inner product in $\mathbb{R}^2$ defined as:
	\begin{center}
		$\left<\mathbf{u},\mathbf{v}\right>=4u_1v_1+5u_2v_2$
	\end{center}
	In this way we give less weight to distances in the first component with respect to distances in the second component.
	
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{exampleblock}{}
	Now we have to prove that this function is effectively an inner product:
	\begin{enumerate}
		\item $\left<\mathbf{u},\mathbf{v}\right>=\left<\mathbf{v},\mathbf{u}\right>$
		  \begin{center}
				$\begin{array}{rcll}\left<\mathbf{u},\mathbf{v}\right>&=&4u_1v_1+5u_2v_2&\quad\text{[by definition]}\\
				   &=&4v_1u_1+5v_2u_2&\quad\text{[commutativity of scalar multiplication]}\\
				   &=&\left<\mathbf{v},\mathbf{u}\right>&\quad\text{[by definition]}
				\end{array}$
			\end{center}
		\item $\left<\mathbf{u}+\mathbf{v},\mathbf{w}\right>=\left<\mathbf{u},\mathbf{w}\right>+\left<\mathbf{v},\mathbf{w}\right>$
		  \begin{center}
				$\begin{array}{rcll}\left<\mathbf{u}+\mathbf{v},\mathbf{w}\right>&=&4(u_1+v_1)w_1+5(u_2+v_2)w_2&\quad\text{[by definition]}\\
				   &=&4u_1w_1+4v_1w_1+5u_2w_2+5v_2w_2&\quad\text{[distributivity of scalar]}\\
				   & & &\quad\text{[multiplication/addition]}\\
				   &=&4u_1w_1+5u_2w_2+4v_1w_1+5v_2w_2&\quad\text{[commutativity]}\\
				   & & &\quad\text{[of scalar addition]}\\
				   &=&\left<\mathbf{u},\mathbf{w}\right>+\left<\mathbf{v},\mathbf{w}\right>&\quad\text{[by definition]}
				\end{array}$
			\end{center}
	\end{enumerate}
	
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{exampleblock}{}
	\begin{enumerate}
	  \setcounter{enumi}{2}
		\item $\left<c\mathbf{u},\mathbf{v}\right>=c\left<\mathbf{u},\mathbf{v}\right>$
		  \begin{center}
				$\begin{array}{rcll}\left<c\mathbf{u},\mathbf{v}\right>&=&4cu_1v_1+5cu_2v_2&\quad\text{[by definition]}\\
				   &=&c4v_1u_1+c5v_2u_2&\quad\text{[commutativity of scalar multiplication]}\\
				   &=&c(4v_1u_1+5v_2u_2)&\quad\text{[distributivity of scalar multiplication]}\\
				   &=&c\left<\mathbf{u},\mathbf{v}\right>&\quad\text{[by definition]}
				\end{array}$
			\end{center}
		\item $\left<\mathbf{u},\mathbf{u}\right> \geq 0 $ and $\left<\mathbf{u},\mathbf{u}\right>=0$ iff $\mathbf{u}=\mathbf{0}$.
			\begin{enumerate}
				\item $\left<\mathbf{u},\mathbf{u}\right> \geq 0 $
					\begin{center}
						$\begin{array}{rcll}\left<\mathbf{u},\mathbf{u}\right>&=&4u_1^2+5u_2^2&\quad\text{[by definition]}\\
						\end{array}$
					\end{center}
					which is obviously larger than 0.
				\item $\left<\mathbf{u},\mathbf{u}\right>=0$ iff $\mathbf{u}=\mathbf{0}$.
					\begin{center}
						$\left<\mathbf{u},\mathbf{u}\right>=0 \Leftrightarrow 4u_1^2+5u_2^2=0 \Leftrightarrow u_1=u_2=0$
					\end{center}
				\end{enumerate}
	\end{enumerate}
	
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{exampleblock}{Example}
	Consider two vectors $p$ and $q$ the vector space of polynomials of degree $n$ ($\mathbb{P}_n$). Let $t_0,t_1,...,t_n$ be $n$ distinct real numbers and $K$ any scalar. The
	inner product between $p$ and $q$ is defined as
	\begin{center}
		$\left<p,q\right>=K\left(p(t_0)q(t_0)+p(t_1)q(t_1)+...+p(t_n)q(t_n)\right)$
	\end{center}
	Axioms 1-3 are easy to check. Let's prove Axiom 4
	\begin{enumerate}
	  \setcounter{enumi}{3}
		\item $\left<p,p\right> \geq 0 $ and $\left<p,p\right>=0$ iff $p=0$.
			\begin{enumerate}
				\item $\left<p,p\right> \geq 0 $
					\begin{center}
						$\begin{array}{rcll}\left<p,p\right>&=&K\left(p^2(t_0)+p^2(t_1)+...+p^2(t_n)\right)&\quad\text{[by definition]}\\
						\end{array}$
					\end{center}
					which is obviously larger than 0.
				\item $\left<p,p\right>=0$ iff $p=0$.
					\begin{center}
						$\left<p,p\right>=0 \Leftrightarrow K\left(p^2(t_0)+p^2(t_1)+...+p^2(t_n)\right) \Leftrightarrow $\\
						$p(t_0)=p(t_1)=...=p(t_n)=0$
					\end{center}
					But $p$ is a polynomial of degree $n$ so, at most, it can have $n$ zeros. However, the previous condition requires the polynomial
					to vanish at $n+1$ points. This is impossible unless $p=0$.
				\end{enumerate}
	\end{enumerate}	
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{exampleblock}{Example}
	Consider two vectors $p$ and $q$ the vector space of polynomials of degree $n$ ($\mathbb{P}_n$). Assume that we regularly space the $n+1$ points in the interval $[-1,1]$
	\begin{center}
		\includegraphics[scale=0.35]{figInnerProduct.jpg}
	\end{center}
	and set $K=\Delta T$, then the inner product between the two polynomials becomes
	\begin{center}
		$\left<p,q\right>=\left(p(t_0)q(t_0)+p(t_1)q(t_1)+...+p(t_n)q(t_n)\right)\Delta T=\sum\limits_{i=0}^n{p(t_i)q(t_i)\Delta T}$
	\end{center}
	Making $\Delta T$ tend to 0, the inner product becomes
	\begin{center}
		$\left<p,q\right>=\int_{-1}^1{p(t)q(t)dt}$
	\end{center}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{exampleblock}{}
	Legendre polynomials are orthogonal polynomials in the interval $[-1,1]$
	\begin{center}
		\includegraphics[height=4cm]{figLegendre.png}
		\includegraphics[height=4cm]{figLegendre2.png}
	\end{center}
	Legendre polynomials are very useful for regression of high-order polynomials as shown in next slide.
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Inner product spaces} 
\begin{exampleblock}{}
	\begin{center}
		\includegraphics[width=11cm]{figLegendre3.jpg}
	\end{center}
	\label{legendre}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Length, distance and orthogonality} 
\begin{block}{Length, distance and orthogonality}
  The \textbf{length} of a vector $\mathbf{u}$ in an inner product space is defined in the standard way
	\begin{center}
		$\|\mathbf{u}\|=\sqrt{\left<\mathbf{u},\mathbf{u}\right>}$
	\end{center}
	Similarly, the \textbf{distance between two vectors} $\mathbf{u}$ and $\mathbf{v}$ is defined as
	\begin{center}
		$d(\mathbf{u},\mathbf{v})=\|\mathbf{u}-\mathbf{v}\|$
	\end{center}
	Finally, two vectors $\mathbf{u}$ and $\mathbf{v}$ are said to be \textbf{orthogonal} iff
	\begin{center}
		$\left<\mathbf{u},\mathbf{v}\right>=0$
	\end{center}
	
\end{block}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Length, distance and orthogonality} 
\begin{exampleblock}{Example}
  In the vector space of polynomials in the interval $[0,1]$, $\mathbb{P}[0,1]$, let's define the inner product
	\begin{center}
		$\left<p,q\right>=\int_0^1{p(t)q(t)dt}$
	\end{center}
	What is the length of the vector $p(t)=3t^2$?\\
	\underline{\textit{Solution}}\\
	\begin{center}
		$\begin{array}{rcl}\|p\|&=&\sqrt{\left<p,p\right>}=\sqrt{\int_0^1{p^2(t)dt}}=\sqrt{\int_0^1{(3t^2)^2dt}}=\sqrt{\int_0^1{9t^4dt}}\\
		  &=&\sqrt{\left.9\frac{t^5}{5}\right|_0^1}=\sqrt{9\left(\frac{1}{5}-0\right)}=\frac{3}{\sqrt{5}}
		\end{array}$
	\end{center}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Gram-Schmidt orthogonalization} 
\begin{exampleblock}{Example}
  Gram-Schmidt is applied in the standard way. For instance, find an orthogonal basis of $\mathbb{P}_2[-1,1]$. A basis that spans this space is
	\begin{center}
		$\{1,t,t^2\}$
	\end{center}
	Let's orthogonalize it
	\begin{center}
		$\begin{array}{rcl}
		   p_0(t)&=&1\\
			 p_1(t)&=&t-\frac{\left<t,p_0(t)\right>}{\|p_0\|^2}p_0(t)
			        =t-\frac{\int_{-1}^1{t dt}}{\int_{-1}^1{dt}} 1=t-\frac{0}{2} 1=t\\
			 p_2(t)&=&t^2-\frac{\left<t^2,p_0(t)\right>}{\|p_0\|^2}p_0(t)-\frac{\left<t^2,p_1(t)\right>}{\|p_1\|^2}p_1(t)\\
			       &=&t^2-\frac{\int_{-1}^1{t^2 dt}}{\int_{-1}^1{dt}}-\frac{\int_{-1}^1{t^2 t dt}}{\int_{-1}^1{t^2 dt}}t=t^2-\frac{\frac{2}{3}}{2}=
						    t^2-\frac{1}{3}\\
		 \end{array}$
	\end{center}
	In Slide \pageref{legendre} we proposed the Legendre polynomial of degree 2 to be $P_2(t)=\frac{1}{2}(3t^2-1)$, we can easily show that $P_2(t)=\frac{3}{2}p_2(t)$. Consequently,
	if $p_2(t)$ is orthogonal to $p_0(t)$ and $p_1(t)$ so is $P_2(t)$.
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Best approximation} 
\begin{exampleblock}{Example}
  What is the best approximation in $\mathbb{P}_2[-1,1]$ of $p(t)=t^3$?\\
	\underline{\textit{Solution}}\\
	We know the answer is the orthogonal projection of $p(t)$ onto $\mathbb{P}_2[-1,1]$. An orthogonal basis of $\mathbb{P}_2[-1,1]$ is $\{1,t,t^2-\frac{1}{3}\}$. Therefore,
	this projection can be calculated as
	\begin{center}
		$\hat{p}(t)=\mathrm{Proj}_{\mathbb{P}_2[-1,1]}\{p(t)\}=\frac{\left<p,p_0\right>}{\|p_0\|^2}p_0(t)+\frac{\left<p,p_1\right>}{\|p_1\|^2}p_1(t)+
		   \frac{\left<p,p_2\right>}{\|p_2\|^2}p_2(t)$
	\end{center}
	Let's perform these calculations:
	\begin{center}
		\begin{tabular}{l|l}
			$\left<p,p_0(t)\right>=\int_{-1}^1{t^3dt}=0$ & $\|p_0\|^2=\int_{-1}^1{dt}=2$ \\
			$\left<p,p_1(t)\right>=\int_{-1}^1{t^3tdt}=\frac{2}{5}$ & $\|p_1\|^2=\int_{-1}^1{t^2dt}=\frac{2}{3}$ \\
			$\left<p,p_2(t)\right>=\int_{-1}^1{t^3(t^2-\frac{1}{3})dt}=0$ & $\|p_2\|^2=\int_{-1}^1{(t^2-\frac{1}{3})^2dt}=\frac{8}{45}$ \\
		\end{tabular}
		$\hat{p}(t)=\frac{0}{2}+\frac{\frac{2}{5}}{\frac{2}{3}}t+\frac{0}{\frac{8}{45}}(t^2-\frac{1}{3})=\frac{3}{5}t$
	\end{center}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Best approximation} 
\begin{exampleblock}{}
	\begin{center}
		\includegraphics[scale=0.5]{figBestApproximation.eps}
		% t=-1:0.001:1;  plot(t,t.^3,t,0.6*t,'LineWidth',2); legend('t^3','3/5t')
	\end{center}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Best approximation} 
\begin{exampleblock}{Example}
  In this example we exploited the best approximation property of orthogonal wavelets to speed-up and make more robust angular alignments of projections in 3D Electron Microscopy.
	\begin{center}
		\includegraphics[scale=0.25]{figOrthogonalityEM.jpg}
	\end{center}
	\begin{tiny}
		C.O.S.Sorzano, S. Jonic, C. El-Bez, J.M. Carazo, S. De Carlo, P. Thévenaz, M. Unser. \textit{A multiresolution approach to orientation assignment in 3-D electron microscopy of single particles}. Journal of Structural Biology 146(3): 381-392 (\textbf{2004}, cover article)
	\end{tiny}
\end{exampleblock}
\end{frame}



% ==============================================
\begin{frame}\frametitle{Pythagorean theorem} 
\begin{ceuthm}[Pythagorean theorem]
  Given any vector $\mathbf{v}$ in an inner product space $V$ and a subspace of it $W\subseteq V$ we have
	\begin{center}
	  $\|\mathbf{v}\|^2=\|\mathrm{Proj}_W\{\mathbf{v}\}\|^2+\|\mathbf{v}-\mathrm{Proj}_W\{\mathbf{v}\}\|^2$\\
		\includegraphics[scale=0.5]{figPythagorean.jpg}
	\end{center}
	\label{thm:pythagorean2}
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{The Cauchy-Schwarz inequality} 
\begin{ceuthm}[The Cauchy-Schwarz inequality]
  For all $\mathbf{u},\mathbf{v}\in V$ it is verified
	\begin{center}
	  $|\left<\mathbf{u},\mathbf{v}\right>|\leq\|\mathbf{u}\|\|\mathbf{v}\|$
	\end{center}
	\underline{\textit{Proof}}\\
	If $\mathbf{u}=\mathbf{0}$, then 
	\begin{center}
		$|\left<\mathbf{0},\mathbf{v}\right>|=0\quad$ and $\quad\|\mathbf{0}\|\|\mathbf{v}\|=0\|\mathbf{v}\|=0$
	\end{center}
	So the inequality becomes an equality.\\
	If $\mathbf{u}\neq\mathbf{0}$, then consider $W=\mathrm{Span}\{\mathbf{u}\}$ and
	\begin{center}
		$\|\mathrm{Proj}_W\{\mathbf{v}\}\|=\left\|\frac{\left<\mathbf{v},\mathbf{u}\right>}{\|\mathbf{u}\|^2} \mathbf{u}\right\|=
		   \frac{|\left<\mathbf{v},\mathbf{u}\right>|}{\|\mathbf{u}\|^2}\|\mathbf{u}\|=\frac{|\left<\mathbf{v},\mathbf{u}\right>|}{\|\mathbf{u}\|}$
	\end{center}
	But by the Pythagorean Theorem (Theorem \ref{thm:pythagorean2}) we have $\|\mathrm{Proj}_W\{\mathbf{v}\}\|\leq\|\mathbf{v}\|$. Consequently,
	\begin{center}
		$\frac{|\left<\mathbf{v},\mathbf{u}\right>|}{\|\mathbf{u}\|}\leq\|\mathbf{v}\| \Rightarrow |\left<\mathbf{v},\mathbf{u}\right>| \leq \|\mathbf{u}\|\|\mathbf{v}\|$ (q.e.d.)
	\end{center}
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{The Triangle inequality} 
\begin{ceuthm}[The Triangle inequality]
  For all $\mathbf{u},\mathbf{v}\in V$ it is verified
	\begin{center}
	  $\|\mathbf{u}+\mathbf{v}\|\leq\|\mathbf{u}\|+\|\mathbf{v}\|$
	\end{center}
	\underline{\textit{Proof}}\\
	\begin{center}
		$\begin{array}{rcll}\|\mathbf{u}+\mathbf{v}\|^2&=&\left<\mathbf{u}+\mathbf{v},\mathbf{u}+\mathbf{v}\right>& \quad\text{[By definition]}\\
		   &=&\left<\mathbf{u},\mathbf{u}\right>+\left<\mathbf{v},\mathbf{v}\right>+2\left<\mathbf{u},\mathbf{v}\right> & \quad\text{[Properties of inner product]}\\
		   &\leq&\|\mathbf{u}\|^2+\|\mathbf{v}\|^2+2|\left<\mathbf{u},\mathbf{v}\right>| & \quad \left<\mathbf{u},\mathbf{v}\right>\leq|\left<\mathbf{u},\mathbf{v}\right>|\\
		   &\leq&\|\mathbf{u}\|^2+\|\mathbf{v}\|^2+2\|\mathbf{u}\|\|\mathbf{v}\| & \quad \text{Cauchy-Schwarz}\\
		   &=&(\|\mathbf{u}\|+\|\mathbf{v}\|)^2 & \\
		   &\Rightarrow& & \\
		   \|\mathbf{u}+\mathbf{v}\|&\leq&\|\mathbf{u}\|+\|\mathbf{v}\| & \quad\text{[Taking square root]}\\
		\end{array}$
	\end{center}
	(q.e.d.)
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 7:
	\begin{itemize}
		\item 6.7.1
		\item 6.7.13
		\item 6.7.16
		\item 6.7.18
	\end{itemize}
\end{exerciseblock}

\end{frame}

% ==============================================
\subsection{Applications of inner product spaces (d)} 
\Outline

\begin{frame}\frametitle{Weighted Least Squares} 
\begin{block}{Weighted Least Squares}
	Let us assume we have a table of collected data and we want to fit a least squares model. However, we want to give more importance to 
	some observations because we are more confident about them or they are more important. We encode the importance as a weight value (the larger the weight,
	the more importance the observation has)
	\begin{center}
		\begin{tabular}{c|c|c}
			\textbf{X} & \textbf{Y} & \textbf{W} \\
			\hline
			$x_1$ & $y_1$ & $w_1$ \\
			$x_2$ & $y_2$ & $w_2$ \\
			$x_3$ & $y_3$ & $w_3$ \\
			... & ... & ...
		\end{tabular}
	\end{center}
	Let us call $\hat{y}_j$ the prediction of the model for the $j$-th observation and $\epsilon_j$ the committed error
	\begin{center}
		$y_j=\hat{y}_j+\epsilon_j$
	\end{center}
\end{block}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Weighted Least Squares} 
\begin{block}{}
	The goal is now to minimize the weighted sum of square errors
	\begin{center}
		$\sum\limits_{j=1}^n{(w_j\epsilon_j)^2}=\sum\limits_{j=1}^n{(w_j(y_j-\hat{y}_j))^2}=\sum\limits_{j=1}^n{(w_jy_j-w_j\hat{y}_j)^2}$
	\end{center}
	Let us collect all observed values into a vector $\mathbf{y}$ and do analogously with the predictions $\hat{\mathbf{y}}$. Let us
	define the diagonal matrix
	\begin{center}
		$W=\begin{pmatrix} w_1 & 0 & 0 & ... & 0 \\ 0 & w_2 & 0 & ... & 0 \\ 0 & 0 & w_3 & ... & 0 \\ ... & ... & ... & ... & ... \\ 0 & 0 & 0 & ... & w_n \end{pmatrix}$
	\end{center}
	Then, the previous objective function becomes
	\begin{center}
		$\sum\limits_{j=1}^n{(w_jy_j-w_j\hat{y}_j)^2}=\|W\mathbf{y}-W\hat{\mathbf{y}}\|^2$
	\end{center}
\end{block}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Weighted Least Squares} 
\begin{block}{}
	Now, suppose that $\hat{\mathbf{y}}$ is calculated from the columns of a matrix $A$, that is, $\hat{\mathbf{y}}=A\mathbf{x}$. The objective function becomes
	\begin{center}
		$\sum\limits_{j=1}^n{(w_jy_j-w_j\hat{y}_j)^2}=\|W\mathbf{y}-WA\mathbf{x}\|^2$
	\end{center}
	The minimum of this objective function is attained for $\hat{\mathbf{x}}$ that is the least-squares solution of the equation system 
	\begin{center}
		$WA\mathbf{x}=W\mathbf{y}$
	\end{center}
	The normal equations of the problem are
	\begin{center}
		$(WA)^TWA\mathbf{x}=(WA)^TW\mathbf{y}$
	\end{center}
	
\end{block}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Weighted Least Squares} 
\begin{exampleblock}{Example}
	In this work they used Weighted Least Squares to calibrate a digital system to measure maximum respiratory pressures.
	\begin{center}
		\includegraphics[scale=0.55]{figWLS.jpg}
	\end{center}
	\begin{tiny}
		J.L. Ferreira, F.H. Vasconcelos, C.J. Tierra-Criollo. \textit{A Case Study of Applying Weighted Least Squares to Calibrate a Digital Maximum Respiratory Pressures Measuring System}.
		Applied Biomedical Engineering, Chapter 18 (\textbf{2011})
	\end{tiny}
	
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Fourier Series} 
\begin{exampleblock}{Example}
	\begin{columns}
		\begin{column}{5cm}
			Fourier tools are, maybe, the most widespread tool to analyze signals and its frequency components. Fourier decomposition states that any signal can be obtained by
			summing sine waves of different amplitude, phase and frequency.
		\end{column}
		\begin{column}{5cm}
			\begin{center}
				\includegraphics[height=6.5cm]{figFourier.jpg}
			\end{center}
		\end{column}
	\end{columns}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Fourier Series} 
\begin{ceuthm}
	Consider the vector space of continuous functions in the interval $[0,2\pi]$, $C[0,2\pi]$.
  The set
	\begin{center}
		$S=\{1,\cos(t),\sin(t),\cos(2t),\sin(2t),...,\cos(Nt),\sin(Nt)\}$
	\end{center}
	is orthogonal with respect to the inner product defined as
	\begin{center}
		$\left<f(t),g(t)\right>=\int_0^{2\pi}{f(t)g(t)dt}$
	\end{center}
	\underline{\textit{Proof}}\\
	\begin{center}
		$\begin{array}{rcl}
			\left<\cos(nt),\cos(mt)\right>&=&\int_0^{2\pi}{\cos(nt)\cos(mt)dt}\\
			   &=&\int_0^{2\pi}{\frac{1}{2}(\cos((n+m)t)+\cos((n-m)t))dt}\\
			   &=&\frac{1}{2}\left.\left(\frac{\sin((n+m)t)}{n+m}+\frac{\sin((n-m)t)}{n-m}\right)\right|_0^{2*\pi}\\
			   &=&0 \\
		\end{array}$
	\end{center}
	where we have used $\cos(A)\cos(B)=\frac{1}{2}(\cos(A+B)+\cos(A-B))$.
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Fourier Series} 
\begin{block}{}
  Analogously we could prove that
	\begin{center}
		$\begin{array}{rcl}
			\left<\cos(nt),\sin(mt)\right>&=&0 \\
			\left<\cos(nt),1\right>&=&0 \\
			\left<\sin(nt),1\right>&=&0 \\
			\|\cos(nt)\|^2&=&\pi \\
			\|\sin(nt)\|^2&=&\pi \\
			\|1\|^2&=&2\pi \\
		\end{array}$
	\end{center}
\end{block}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Fourier Series} 
\begin{ceuthm}[Fourier series]
	Given any function $f(t)\in C[0,2\pi]$, $f(t)$ can be approximated as closely as desired by a sum of the form simply by orthogonally projecting it onto
	$W=\mathrm{Span}\{S\}$
	\begin{center}
		$f(t)\approx \mathrm{Proj}_W\{f(t)\}=\frac{\left<f(t),1\right>}{\|1\|^2}+\sum\limits_{n=1}^N{\left(\frac{\left<f(t),\cos(nt)\right>}{\|\cos(nt)\|^2}\cos(nt)+
			\frac{\left<f(t),\sin(nt)\right>}{\|\sin(nt)\|^2}\sin(nt)\right)}$
		\includegraphics[scale=0.25]{figFourier2.jpg}
	\end{center}
\end{ceuthm}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Fourier Series} 
\begin{exampleblock}{Example}
	In this work we used Fourier space to simulate and to align electron microscopy images
	\begin{center}
		\includegraphics[scale=0.3]{figFourier3.jpg}
	\end{center}
	\begin{tiny}
		S. Jonic, C.O.S.Sorzano, P. Thévenaz, C. El-Bez, S. De Carlo, M. Unser. \textit{Spline-Based image-to-volume registration for three-dimensional electron microscopy}. Ultramicroscopy,  103:  303-317 (\textbf{2005}) 
	\end{tiny}
\end{exampleblock}
\end{frame}

% ==============================================
\begin{frame}\frametitle{Exercises} 

\begin{exerciseblock}{Exercises}
	From Lay (3rd ed.), Chapter 6, Section 8:
	\begin{itemize}
		\item 6.8.1
		\item 6.8.6
		\item 6.8.8
		\item 6.8.11
	\end{itemize}
\end{exerciseblock}

\end{frame}


\OutlineFinal

\end{document}